diff --git a/.asf.yaml b/.asf.yaml index 3935a525ff3c4..296aaff5c4a2d 100644 --- a/.asf.yaml +++ b/.asf.yaml @@ -13,7 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# https://cwiki.apache.org/confluence/display/INFRA/git+-+.asf.yaml+features +# https://github.com/apache/infrastructure-asfyaml/blob/main/README.md --- github: description: "Apache Spark - A unified analytics engine for large-scale data processing" diff --git a/.github/workflows/benchmark.yml b/.github/workflows/benchmark.yml index 6b2e72b3f23be..3e90bb329be56 100644 --- a/.github/workflows/benchmark.yml +++ b/.github/workflows/benchmark.yml @@ -50,6 +50,11 @@ on: description: 'Number of job splits' required: true default: '1' + create-commit: + type: boolean + description: 'Commit the benchmark results to the current branch' + required: true + default: false jobs: matrix-gen: @@ -195,10 +200,31 @@ jobs: # To keep the directory structure and file permissions, tar them # See also https://github.com/actions/upload-artifact#maintaining-file-permissions-and-case-sensitive-files echo "Preparing the benchmark results:" - tar -cvf benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude=tpcds-sf-1-text --exclude-standard` + tar -cvf target/benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar `git diff --name-only` `git ls-files --others --exclude=tpcds-sf-1 --exclude=tpcds-sf-1-text --exclude-standard` + - name: Create a pull request with the results + if: ${{ inputs.create-commit && success() }} + run: | + git config --local user.name "${{ github.actor }}" + git config --local user.email "${{ github.event.pusher.email || format('{0}@users.noreply.github.com', github.actor) }}" + git add -A + git commit -m "Benchmark results for ${{ inputs.class }} (JDK ${{ inputs.jdk }}, Scala ${{ inputs.scala }}, split ${{ matrix.split }} of ${{ inputs.num-splits }})" + for i in {1..5}; do + echo "Attempt $i to push..." + git fetch origin ${{ github.ref_name }} + git rebase origin/${{ github.ref_name }} + if git push origin ${{ github.ref_name }}:${{ github.ref_name }}; then + echo "Push successful." + exit 0 + else + echo "Push failed, retrying in 3 seconds..." + sleep 3 + fi + done + echo "Error: Failed to push after 5 attempts." + exit 1 - name: Upload benchmark results uses: actions/upload-artifact@v4 with: name: benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}-${{ matrix.split }} - path: benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar + path: target/benchmark-results-${{ inputs.jdk }}-${{ inputs.scala }}.tar diff --git a/.github/workflows/build_and_test.yml b/.github/workflows/build_and_test.yml index ff005103a2461..b54a382dac053 100644 --- a/.github/workflows/build_and_test.yml +++ b/.github/workflows/build_and_test.yml @@ -112,7 +112,7 @@ jobs: ui=false docs=false fi - build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"` + build=`./dev/is-changed.py -m "core,unsafe,kvstore,avro,utils,utils-java,network-common,network-shuffle,repl,launcher,examples,sketch,variant,api,catalyst,hive-thriftserver,mllib-local,mllib,graphx,streaming,sql-kafka-0-10,streaming-kafka-0-10,streaming-kinesis-asl,kubernetes,hadoop-cloud,spark-ganglia-lgpl,profiler,protobuf,yarn,connect,sql,hive,pipelines"` precondition=" { \"build\": \"$build\", @@ -122,6 +122,8 @@ jobs: \"tpcds-1g\": \"$tpcds\", \"docker-integration-tests\": \"$docker\", \"lint\" : \"true\", + \"java17\" : \"$build\", + \"java25\" : \"$build\", \"docs\" : \"$docs\", \"yarn\" : \"$yarn\", \"k8s-integration-tests\" : \"$kubernetes\", @@ -240,7 +242,7 @@ jobs: # Note that the modules below are from sparktestsupport/modules.py. modules: - >- - core, unsafe, kvstore, avro, utils, + core, unsafe, kvstore, avro, utils, utils-java, network-common, network-shuffle, repl, launcher, examples, sketch, variant - >- @@ -360,7 +362,7 @@ jobs: - name: Install Python packages (Python 3.11) if: (contains(matrix.modules, 'sql') && !contains(matrix.modules, 'sql-')) || contains(matrix.modules, 'connect') || contains(matrix.modules, 'yarn') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' + python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'lxml==4.9.4' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' python3.11 -m pip list # Run the tests. - name: Run tests @@ -512,6 +514,8 @@ jobs: pyspark-core, pyspark-errors, pyspark-streaming, pyspark-logger - >- pyspark-mllib, pyspark-ml, pyspark-ml-connect, pyspark-pipelines + - >- + pyspark-structured-streaming, pyspark-structured-streaming-connect - >- pyspark-connect - >- @@ -519,13 +523,9 @@ jobs: - >- pyspark-pandas-slow - >- - pyspark-pandas-connect-part0 - - >- - pyspark-pandas-connect-part1 - - >- - pyspark-pandas-connect-part2 + pyspark-pandas-connect - >- - pyspark-pandas-connect-part3 + pyspark-pandas-slow-connect exclude: # Always run if pyspark == 'true', even infra-image is skip (such as non-master job) # In practice, the build will run in individual PR, but not against the individual commit @@ -533,16 +533,15 @@ jobs: - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-sql, pyspark-resource, pyspark-testing' }} - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-core, pyspark-errors, pyspark-streaming, pyspark-logger' }} - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-mllib, pyspark-ml, pyspark-ml-connect' }} + - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-structured-streaming, pyspark-structured-streaming-connect' }} - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark != 'true' && 'pyspark-connect' }} # Always run if pyspark-pandas == 'true', even infra-image is skip (such as non-master job) # In practice, the build will run in individual PR, but not against the individual commit # in Apache Spark repository. - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas' }} - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow' }} - - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part0' }} - - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part1' }} - - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part2' }} - - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect-part3' }} + - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-connect' }} + - modules: ${{ fromJson(needs.precondition.outputs.required).pyspark-pandas != 'true' && 'pyspark-pandas-slow-connect' }} env: MODULES_TO_TEST: ${{ matrix.modules }} HADOOP_PROFILE: ${{ inputs.hadoop }} @@ -605,8 +604,9 @@ jobs: run: | for py in $(echo $PYTHON_TO_TEST | tr "," "\n") do - echo $py + $py --version $py -m pip list + echo "" done - name: Install Conda for pip packaging test if: contains(matrix.modules, 'pyspark-errors') @@ -766,7 +766,7 @@ jobs: python-version: '3.11' - name: Install dependencies for Python CodeGen check run: | - python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.1' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' + python3.11 -m pip install 'black==23.12.1' 'protobuf==5.29.5' 'mypy==1.8.0' 'mypy-protobuf==3.3.0' python3.11 -m pip list - name: Python CodeGen check for branch-3.5 if: inputs.branch == 'branch-3.5' @@ -919,6 +919,42 @@ jobs: - name: R linter run: ./dev/lint-r + java17: + needs: [precondition] + if: fromJson(needs.precondition.outputs.required).java17 == 'true' + name: Java 17 build with Maven + runs-on: ubuntu-latest + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: 17 + - name: Build with Maven + run: | + export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install + + java25: + needs: [precondition] + if: fromJson(needs.precondition.outputs.required).java25 == 'true' + name: Java 25 build with Maven + runs-on: ubuntu-latest + timeout-minutes: 120 + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-java@v4 + with: + distribution: zulu + java-version: 25 + - name: Build with Maven + run: | + export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" + export MAVEN_CLI_OPTS="--no-transfer-progress" + ./build/mvn $MAVEN_CLI_OPTS -DskipTests -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl clean install + # Documentation build docs: needs: [precondition, infra-image] @@ -998,10 +1034,14 @@ jobs: # Should unpin 'sphinxcontrib-*' after upgrading sphinx>5 python3.9 -m pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' python3.9 -m pip install ipython_genutils # See SPARK-38517 - python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.20.0' pyarrow pandas 'plotly<6.0.0' + python3.9 -m pip install sphinx_plotly_directive 'numpy>=1.22' pyarrow pandas 'plotly<6.0.0' python3.9 -m pip install 'docutils<0.18.0' # See SPARK-39421 - - name: List Python packages + - name: List Python packages for branch-3.5 and branch-4.0 + if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0' run: python3.9 -m pip list + - name: List Python packages + if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' + run: python3.11 -m pip list - name: Install dependencies for documentation generation run: | # Keep the version of Bundler here in sync with the following locations: @@ -1010,7 +1050,8 @@ jobs: gem install bundler -v 2.4.22 cd docs bundle install --retry=100 - - name: Run documentation build + - name: Run documentation build for branch-3.5 and branch-4.0 + if: inputs.branch == 'branch-3.5' || inputs.branch == 'branch-4.0' run: | # We need this link to make sure `python3` points to `python3.9` which contains the prerequisite packages. ln -s "$(which python3.9)" "/usr/local/bin/python3" @@ -1031,6 +1072,30 @@ jobs: echo "SKIP_SQLDOC: $SKIP_SQLDOC" cd docs bundle exec jekyll build + - name: Run documentation build + if: inputs.branch != 'branch-3.5' && inputs.branch != 'branch-4.0' + run: | + # We need this link to make sure `python3` points to `python3.11` which contains the prerequisite packages. + ln -s "$(which python3.11)" "/usr/local/bin/python3" + # Build docs first with SKIP_API to ensure they are buildable without requiring any + # language docs to be built beforehand. + cd docs; SKIP_ERRORDOC=1 SKIP_API=1 bundle exec jekyll build; cd .. + if [ -f "./dev/is-changed.py" ]; then + # Skip PySpark and SparkR docs while keeping Scala/Java/SQL docs + pyspark_modules=`cd dev && python3.11 -c "import sparktestsupport.modules as m; print(','.join(m.name for m in m.all_modules if m.name.startswith('pyspark')))"` + if [ `./dev/is-changed.py -m $pyspark_modules` = false ]; then export SKIP_PYTHONDOC=1; fi + if [ `./dev/is-changed.py -m sparkr` = false ]; then export SKIP_RDOC=1; fi + fi + export PYSPARK_DRIVER_PYTHON=python3.11 + export PYSPARK_PYTHON=python3.11 + # Print the values of environment variables `SKIP_ERRORDOC`, `SKIP_SCALADOC`, `SKIP_PYTHONDOC`, `SKIP_RDOC` and `SKIP_SQLDOC` + echo "SKIP_ERRORDOC: $SKIP_ERRORDOC" + echo "SKIP_SCALADOC: $SKIP_SCALADOC" + echo "SKIP_PYTHONDOC: $SKIP_PYTHONDOC" + echo "SKIP_RDOC: $SKIP_RDOC" + echo "SKIP_SQLDOC: $SKIP_SQLDOC" + cd docs + bundle exec jekyll build - name: Tar documentation if: github.repository != 'apache/spark' run: tar cjf site.tar.bz2 docs/_site @@ -1259,9 +1324,9 @@ jobs: sudo apt update sudo apt-get install r-base - name: Start Minikube - uses: medyagh/setup-minikube@v0.0.19 + uses: medyagh/setup-minikube@v0.0.20 with: - kubernetes-version: "1.33.0" + kubernetes-version: "1.34.0" # Github Action limit cpu:2, memory: 6947MB, limit to 2U6G for better resource statistic cpus: 2 memory: 6144m @@ -1279,8 +1344,10 @@ jobs: kubectl create clusterrolebinding serviceaccounts-cluster-admin --clusterrole=cluster-admin --group=system:serviceaccounts || true if [[ "${{ inputs.branch }}" == 'branch-3.5' ]]; then kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.7.0/installer/volcano-development.yaml || true - else + elif [[ "${{ inputs.branch }}" == 'branch-4.0' ]]; then kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.11.0/installer/volcano-development.yaml || true + else + kubectl apply -f https://raw.githubusercontent.com/volcano-sh/volcano/v1.12.2/installer/volcano-development.yaml || true fi eval $(minikube docker-env) build/sbt -Phadoop-3 -Psparkr -Pkubernetes -Pvolcano -Pkubernetes-integration-tests -Dspark.kubernetes.test.volcanoMaxConcurrencyJobNum=1 -Dtest.exclude.tags=local "kubernetes-integration-tests/test" diff --git a/.github/workflows/build_infra_images_cache.yml b/.github/workflows/build_infra_images_cache.yml index ccd47826ff099..430903b570ea0 100644 --- a/.github/workflows/build_infra_images_cache.yml +++ b/.github/workflows/build_infra_images_cache.yml @@ -33,13 +33,13 @@ on: - 'dev/spark-test-image/python-minimum/Dockerfile' - 'dev/spark-test-image/python-ps-minimum/Dockerfile' - 'dev/spark-test-image/pypy-310/Dockerfile' - - 'dev/spark-test-image/python-309/Dockerfile' - 'dev/spark-test-image/python-310/Dockerfile' - 'dev/spark-test-image/python-311/Dockerfile' - 'dev/spark-test-image/python-311-classic-only/Dockerfile' - 'dev/spark-test-image/python-312/Dockerfile' - 'dev/spark-test-image/python-313/Dockerfile' - 'dev/spark-test-image/python-313-nogil/Dockerfile' + - 'dev/spark-test-image/python-314/Dockerfile' - 'dev/spark-test-image/numpy-213/Dockerfile' - '.github/workflows/build_infra_images_cache.yml' # Create infra image when cutting down branches/tags @@ -153,19 +153,6 @@ jobs: - name: Image digest (PySpark with PyPy 3.10) if: hashFiles('dev/spark-test-image/pypy-310/Dockerfile') != '' run: echo ${{ steps.docker_build_pyspark_pypy_310.outputs.digest }} - - name: Build and push (PySpark with Python 3.9) - if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != '' - id: docker_build_pyspark_python_309 - uses: docker/build-push-action@v6 - with: - context: ./dev/spark-test-image/python-309/ - push: true - tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }}-static - cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }} - cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-309-cache:${{ github.ref_name }},mode=max - - name: Image digest (PySpark with Python 3.9) - if: hashFiles('dev/spark-test-image/python-309/Dockerfile') != '' - run: echo ${{ steps.docker_build_pyspark_python_309.outputs.digest }} - name: Build and push (PySpark with Python 3.10) if: hashFiles('dev/spark-test-image/python-310/Dockerfile') != '' id: docker_build_pyspark_python_310 @@ -244,6 +231,19 @@ jobs: - name: Image digest (PySpark with Python 3.13 no GIL) if: hashFiles('dev/spark-test-image/python-313-nogil/Dockerfile') != '' run: echo ${{ steps.docker_build_pyspark_python_313_nogil.outputs.digest }} + - name: Build and push (PySpark with Python 3.14) + if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != '' + id: docker_build_pyspark_python_314 + uses: docker/build-push-action@v6 + with: + context: ./dev/spark-test-image/python-314/ + push: true + tags: ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }}-static + cache-from: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }} + cache-to: type=registry,ref=ghcr.io/apache/spark/apache-spark-github-action-image-pyspark-python-314-cache:${{ github.ref_name }},mode=max + - name: Image digest (PySpark with Python 3.14) + if: hashFiles('dev/spark-test-image/python-314/Dockerfile') != '' + run: echo ${{ steps.docker_build_pyspark_python_314.outputs.digest }} - name: Build and push (PySpark with Numpy 2.1.3) if: hashFiles('dev/spark-test-image/numpy-213/Dockerfile') != '' id: docker_build_pyspark_numpy_213 diff --git a/.github/workflows/build_maven_java21_arm.yml b/.github/workflows/build_maven_java21_arm.yml index 505bdd63189c0..16417bb1c5f22 100644 --- a/.github/workflows/build_maven_java21_arm.yml +++ b/.github/workflows/build_maven_java21_arm.yml @@ -21,7 +21,7 @@ name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, ARM)" on: schedule: - - cron: '0 15 * * *' + - cron: '0 15 */2 * *' workflow_dispatch: jobs: diff --git a/.github/workflows/build_maven_java21_macos15.yml b/.github/workflows/build_maven_java21_macos26.yml similarity index 98% rename from .github/workflows/build_maven_java21_macos15.yml rename to .github/workflows/build_maven_java21_macos26.yml index 14db1b1871bc4..c858a7f70b270 100644 --- a/.github/workflows/build_maven_java21_macos15.yml +++ b/.github/workflows/build_maven_java21_macos26.yml @@ -17,7 +17,7 @@ # under the License. # -name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, MacOS-15)" +name: "Build / Maven (master, Scala 2.13, Hadoop 3, JDK 21, MacOS-26)" on: schedule: @@ -33,7 +33,7 @@ jobs: if: github.repository == 'apache/spark' with: java: 21 - os: macos-15 + os: macos-26 arch: arm64 envs: >- { diff --git a/.github/workflows/build_non_ansi.yml b/.github/workflows/build_non_ansi.yml index 547a227e61d7e..debdaf4f8709d 100644 --- a/.github/workflows/build_non_ansi.yml +++ b/.github/workflows/build_non_ansi.yml @@ -40,6 +40,7 @@ jobs: "PYSPARK_IMAGE_TO_TEST": "python-311", "PYTHON_TO_TEST": "python3.11", "SPARK_ANSI_SQL_MODE": "false", + "SPARK_TEST_SPARK_BLOOM_FILTER_SUITE_ENABLED": "true" } jobs: >- { diff --git a/.github/workflows/build_python_3.9.yml b/.github/workflows/build_python_3.14.yml similarity index 89% rename from .github/workflows/build_python_3.9.yml rename to .github/workflows/build_python_3.14.yml index 0df17699140ed..45ea43f1d491c 100644 --- a/.github/workflows/build_python_3.9.yml +++ b/.github/workflows/build_python_3.14.yml @@ -17,7 +17,7 @@ # under the License. # -name: "Build / Python-only (master, Python 3.9)" +name: "Build / Python-only (master, Python 3.14)" on: schedule: @@ -37,8 +37,8 @@ jobs: hadoop: hadoop3 envs: >- { - "PYSPARK_IMAGE_TO_TEST": "python-309", - "PYTHON_TO_TEST": "python3.9" + "PYSPARK_IMAGE_TO_TEST": "python-314", + "PYTHON_TO_TEST": "python3.14" } jobs: >- { diff --git a/.github/workflows/build_python_connect.yml b/.github/workflows/build_python_connect.yml index 8d82ff192ab07..5edb54de82b6d 100644 --- a/.github/workflows/build_python_connect.yml +++ b/.github/workflows/build_python_connect.yml @@ -72,7 +72,7 @@ jobs: python packaging/client/setup.py sdist cd dist pip install pyspark*client-*.tar.gz - pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting + pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' 'graphviz==0.20.3' 'torch<2.6.0' torchvision torcheval deepspeed unittest-xml-reporting - name: List Python packages run: python -m pip list - name: Run tests (local) @@ -96,7 +96,7 @@ jobs: # Several tests related to catalog requires to run them sequencially, e.g., writing a table in a listener. ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-connect,pyspark-ml-connect # None of tests are dependent on each other in Pandas API on Spark so run them in parallel - ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect-part0,pyspark-pandas-connect-part1,pyspark-pandas-connect-part2,pyspark-pandas-connect-part3 + ./python/run-tests --parallelism=1 --python-executables=python3 --modules pyspark-pandas-connect,pyspark-pandas-slow-connect # Stop Spark Connect server. ./sbin/stop-connect-server.sh diff --git a/.github/workflows/build_python_connect35.yml b/.github/workflows/build_python_connect35.yml index e68f288f0184f..0512e33d6cbea 100644 --- a/.github/workflows/build_python_connect35.yml +++ b/.github/workflows/build_python_connect35.yml @@ -68,10 +68,10 @@ jobs: ./build/sbt -Phive Test/package - name: Install Python dependencies run: | - pip install 'numpy==1.25.1' 'pyarrow==12.0.1' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' + pip install 'numpy==1.25.1' 'pyarrow>=18.0.0' 'pandas<=2.0.3' scipy unittest-xml-reporting 'plotly<6.0.0' 'mlflow>=2.3.1' coverage 'matplotlib==3.7.2' openpyxl 'memory-profiler==0.60.0' 'scikit-learn==1.1.*' # Add Python deps for Spark Connect. - pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' + pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' # Add torch as a testing dependency for TorchDistributor pip install 'torch==2.0.1' 'torchvision==0.15.2' torcheval diff --git a/.github/workflows/build_python_minimum.yml b/.github/workflows/build_python_minimum.yml index 4e65503006489..3514a82f6217c 100644 --- a/.github/workflows/build_python_minimum.yml +++ b/.github/workflows/build_python_minimum.yml @@ -38,7 +38,7 @@ jobs: envs: >- { "PYSPARK_IMAGE_TO_TEST": "python-minimum", - "PYTHON_TO_TEST": "python3.9" + "PYTHON_TO_TEST": "python3.10" } jobs: >- { diff --git a/.github/workflows/build_python_ps_minimum.yml b/.github/workflows/build_python_ps_minimum.yml index 3aa83ff06a996..ed80a904ebd7f 100644 --- a/.github/workflows/build_python_ps_minimum.yml +++ b/.github/workflows/build_python_ps_minimum.yml @@ -38,7 +38,7 @@ jobs: envs: >- { "PYSPARK_IMAGE_TO_TEST": "python-ps-minimum", - "PYTHON_TO_TEST": "python3.9" + "PYTHON_TO_TEST": "python3.10" } jobs: >- { diff --git a/.github/workflows/build_sparkr_window.yml b/.github/workflows/build_sparkr_window.yml index e3ef9d7ba0752..8bbcdf79bd58b 100644 --- a/.github/workflows/build_sparkr_window.yml +++ b/.github/workflows/build_sparkr_window.yml @@ -16,7 +16,7 @@ # specific language governing permissions and limitations # under the License. # -name: "Build / SparkR-only (master, 4.4.3, windows-2022)" +name: "Build / SparkR-only (master, 4.4.3, windows-2025)" on: schedule: @@ -26,7 +26,7 @@ on: jobs: build: name: "Build module: sparkr" - runs-on: windows-2022 + runs-on: windows-2025 timeout-minutes: 120 if: github.repository == 'apache/spark' steps: diff --git a/.github/workflows/maven_test.yml b/.github/workflows/maven_test.yml index e0a5e411571a0..95c9aac33fc6c 100644 --- a/.github/workflows/maven_test.yml +++ b/.github/workflows/maven_test.yml @@ -67,7 +67,7 @@ jobs: - hive2.3 modules: - >- - core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#variant + core,launcher,common#unsafe,common#kvstore,common#network-common,common#network-shuffle,common#sketch,common#utils,common#utils-java,common#variant - >- graphx,streaming,hadoop-cloud - >- @@ -78,19 +78,13 @@ jobs: connector#kafka-0-10,connector#kafka-0-10-sql,connector#kafka-0-10-token-provider,connector#spark-ganglia-lgpl,connector#protobuf,connector#avro,connector#kinesis-asl - >- sql#api,sql#catalyst,resource-managers#yarn,resource-managers#kubernetes#core + - >- + connect # Here, we split Hive and SQL tests into some of slow ones and the rest of them. included-tags: [ "" ] excluded-tags: [ "" ] comment: [ "" ] include: - # Connect tests - - modules: connect - java: ${{ inputs.java }} - hadoop: ${{ inputs.hadoop }} - hive: hive2.3 - # TODO(SPARK-47110): Reenble AmmoniteTest tests in Maven builds - excluded-tags: org.apache.spark.tags.AmmoniteTest - comment: "" # Hive tests - modules: sql#hive java: ${{ inputs.java }} @@ -181,15 +175,24 @@ jobs: - name: Install Python packages (Python 3.11) if: contains(matrix.modules, 'resource-managers#yarn') || (contains(matrix.modules, 'sql#core')) || contains(matrix.modules, 'connect') run: | - python3.11 -m pip install 'numpy>=1.20.0' pyarrow pandas scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' + python3.11 -m pip install 'numpy>=1.22' pyarrow pandas pyyaml scipy unittest-xml-reporting 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' python3.11 -m pip list - # Run the tests. + # Run the tests using script command. + # BSD's script command doesn't support -c option, and the usage is different from Linux's one. + # The kind of script command is tested by `script -qec true`. - name: Run tests env: ${{ fromJSON(inputs.envs) }} + shell: | + bash -c "if script -qec true 2>/dev/null; then script -qec bash\ {0}; else script -qe /dev/null bash {0}; fi" run: | + # Fix for TTY related issues when launching the Ammonite REPL in tests. + export TERM=vt100 + # `set -e` to make the exit status as expected due to use script command to run the commands + set -e export MAVEN_OPTS="-Xss64m -Xmx4g -Xms4g -XX:ReservedCodeCacheSize=128m -Dorg.slf4j.simpleLogger.defaultLogLevel=WARN" export MAVEN_CLI_OPTS="--no-transfer-progress" export JAVA_VERSION=${{ matrix.java }} + export INPUT_BRANCH=${{ inputs.branch }} export ENABLE_KINESIS_TESTS=0 # Replace with the real module name, for example, connector#kafka-0-10 -> connector/kafka-0-10 export TEST_MODULES=`echo "$MODULES_TO_TEST" | sed -e "s%#%/%g"` @@ -202,13 +205,24 @@ jobs: if [[ "$INCLUDED_TAGS" != "" ]]; then ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.include.tags="$INCLUDED_TAGS" test -fae + elif [[ "$MODULES_TO_TEST" == "connect" && "$INPUT_BRANCH" == "branch-4.0" ]]; then + # SPARK-53914: Remove sql/connect/client/jdbc from `-pl` for branch-4.0, this branch can be deleted after the EOL of branch-4.0. + ./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae elif [[ "$MODULES_TO_TEST" == "connect" ]]; then - ./build/mvn $MAVEN_CLI_OPTS -Dtest.exclude.tags="$EXCLUDED_TAGS" -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae + ./build/mvn $MAVEN_CLI_OPTS -Djava.version=${JAVA_VERSION/-ea} -pl sql/connect/client/jdbc,sql/connect/client/jvm,sql/connect/common,sql/connect/server test -fae elif [[ "$EXCLUDED_TAGS" != "" ]]; then ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} -Dtest.exclude.tags="$EXCLUDED_TAGS" test -fae elif [[ "$MODULES_TO_TEST" == *"sql#hive-thriftserver"* ]]; then # To avoid a compilation loop, for the `sql/hive-thriftserver` module, run `clean install` instead ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Phadoop-cloud -Pjvm-profiler -Pspark-ganglia-lgpl -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} clean install -fae + elif [[ "$MODULES_TO_TEST" == *"sql#pipelines"* && "$INPUT_BRANCH" == "branch-4.0" ]]; then + # SPARK-52441: Remove sql/pipelines from TEST_MODULES for branch-4.0, this branch can be deleted after the EOL of branch-4.0. + TEST_MODULES=${TEST_MODULES/,sql\/pipelines/} + ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae + elif [[ "$MODULES_TO_TEST" == *"common#utils-java"* && "$INPUT_BRANCH" == "branch-4.0" ]]; then + # SPARK-53138: Remove common/utils-java from TEST_MODULES for branch-4.0, this branch can be deleted after the EOL of branch-4.0. + TEST_MODULES=${TEST_MODULES/,common\/utils-java/} + ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae else ./build/mvn $MAVEN_CLI_OPTS -pl "$TEST_MODULES" -Pyarn -Pkubernetes -Pvolcano -Phive -Phive-thriftserver -Pspark-ganglia-lgpl -Phadoop-cloud -Pjvm-profiler -Pkinesis-asl -Djava.version=${JAVA_VERSION/-ea} test -fae fi diff --git a/.github/workflows/pages.yml b/.github/workflows/pages.yml index 4bcc275064d3c..86ef00220b373 100644 --- a/.github/workflows/pages.yml +++ b/.github/workflows/pages.yml @@ -52,18 +52,18 @@ jobs: with: distribution: zulu java-version: 17 - - name: Install Python 3.9 + - name: Install Python 3.11 uses: actions/setup-python@v5 with: - python-version: '3.9' + python-version: '3.11' architecture: x64 cache: 'pip' - name: Install Python dependencies run: | pip install 'sphinx==4.5.0' mkdocs 'pydata_sphinx_theme>=0.13' sphinx-copybutton nbsphinx numpydoc jinja2 markupsafe 'pyzmq<24.0.0' \ - ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.20.0' pyarrow 'pandas==2.2.3' 'plotly>=4.8' 'docutils<0.18.0' \ + ipython ipython_genutils sphinx_plotly_directive 'numpy>=1.22' pyarrow 'pandas==2.3.2' 'plotly>=4.8' 'docutils<0.18.0' \ 'flake8==3.9.0' 'mypy==1.8.0' 'pytest==7.1.3' 'pytest-mypy-plugins==1.9.3' 'black==23.12.1' \ - 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ + 'pandas-stubs==1.2.0.53' 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'grpc-stubs==1.24.11' 'googleapis-common-protos-stubs==2.2.0' \ 'sphinxcontrib-applehelp==1.0.4' 'sphinxcontrib-devhelp==1.0.2' 'sphinxcontrib-htmlhelp==2.0.1' 'sphinxcontrib-qthelp==1.0.3' 'sphinxcontrib-serializinghtml==1.1.5' - name: Install Ruby for documentation generation uses: ruby/setup-ruby@v1 diff --git a/.github/workflows/python_hosted_runner_test.yml b/.github/workflows/python_hosted_runner_test.yml index 9a87c4f7061b0..9a6afc095063c 100644 --- a/.github/workflows/python_hosted_runner_test.yml +++ b/.github/workflows/python_hosted_runner_test.yml @@ -74,6 +74,8 @@ jobs: pyspark-core, pyspark-errors, pyspark-streaming - >- pyspark-mllib, pyspark-ml, pyspark-ml-connect + - >- + pyspark-structured-streaming, pyspark-structured-streaming-connect - >- pyspark-connect - >- @@ -81,13 +83,9 @@ jobs: - >- pyspark-pandas-slow - >- - pyspark-pandas-connect-part0 - - >- - pyspark-pandas-connect-part1 - - >- - pyspark-pandas-connect-part2 + pyspark-pandas-connect - >- - pyspark-pandas-connect-part3 + pyspark-pandas-slow-connect env: MODULES_TO_TEST: ${{ matrix.modules }} PYTHON_TO_TEST: python${{inputs.python}} @@ -149,8 +147,8 @@ jobs: run: | python${{matrix.python}} -m pip install --ignore-installed 'blinker>=1.6.2' python${{matrix.python}} -m pip install --ignore-installed 'six==1.16.0' - python${{matrix.python}} -m pip install numpy 'pyarrow>=19.0.0' 'six==1.16.0' 'pandas==2.2.3' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ - python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.1' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ + python${{matrix.python}} -m pip install numpy 'pyarrow>=21.0.0' 'six==1.16.0' 'pandas==2.3.2' scipy 'plotly<6.0.0' 'mlflow>=2.8.1' coverage matplotlib openpyxl 'memory-profiler>=0.61.0' 'scikit-learn>=1.3.2' unittest-xml-reporting && \ + python${{matrix.python}} -m pip install 'grpcio==1.67.0' 'grpcio-status==1.67.0' 'protobuf==5.29.5' 'googleapis-common-protos==1.65.0' 'graphviz==0.20.3' && \ python${{matrix.python}} -m pip cache purge - name: List Python packages run: python${{matrix.python}} -m pip list diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 976aaf616295c..5de61c831cbef 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -74,7 +74,8 @@ on: inputs: branch: description: 'Branch to release. Leave it empty to launch a dryrun. Dispatch this workflow only in the forked repository.' - required: false + required: true + default: master release-version: description: 'Release version. Leave it empty to launch a dryrun.' required: false @@ -90,16 +91,34 @@ jobs: release: name: Release Apache Spark runs-on: ubuntu-latest - # Do not allow dispatching this workflow manually in the main repo. - # and skip this workflow in forked repository when running as a - # scheduled job (dryrun). - if: ${{ (github.repository == 'apache/spark') != (inputs.branch != '' && inputs.release-version != '') }} + # Allow workflow to run only in the following cases: + # 1. In the apache/spark repository: + # - Only allow dry runs (i.e., both 'branch' and 'release-version' inputs are empty). + # 2. In forked repositories: + # - Allow real runs when both 'branch' and 'release-version' are provided. + # - Allow dry runs only if manually dispatched (not on a schedule). + if: | + ( + github.repository == 'apache/spark' && + inputs.branch == '' && + inputs.release-version == '' + ) || ( + github.repository != 'apache/spark' && + ( + (inputs.branch != '' && inputs.release-version != '') || github.event_name == 'workflow_dispatch' + ) + ) steps: - name: Checkout Spark repository uses: actions/checkout@v4 with: repository: apache/spark ref: "${{ inputs.branch }}" + - name: Free up disk space + run: | + if [ -f ./dev/free_disk_space ]; then + ./dev/free_disk_space + fi - name: Release Apache Spark env: GIT_BRANCH: "${{ inputs.branch }}" @@ -132,22 +151,12 @@ jobs: sleep 60 fi - empty_count=0 - non_empty_count=0 - for val in "$GIT_BRANCH" "$RELEASE_VERSION" "$SPARK_RC_COUNT"; do - if [ -z "$val" ]; then - empty_count=$((empty_count+1)) - else - non_empty_count=$((non_empty_count+1)) - fi - done - - if [ "$empty_count" -gt 0 ] && [ "$non_empty_count" -gt 0 ]; then - echo "Error: Either provide all inputs or leave them all empty for a dryrun." + if { [ -n "$RELEASE_VERSION" ] && [ -z "$SPARK_RC_COUNT" ]; } || { [ -z "$RELEASE_VERSION" ] && [ -n "$SPARK_RC_COUNT" ]; }; then + echo "Error: Either provide both 'Release version' and 'RC number', or leave both empty for a dryrun." exit 1 fi - if [ "$empty_count" -eq 3 ]; then + if [ -z "$RELEASE_VERSION" ] && [ -z "$SPARK_RC_COUNT" ]; then echo "Dry run mode enabled" export DRYRUN_MODE=1 ASF_PASSWORD="not_used" @@ -155,7 +164,6 @@ jobs: GPG_PASSPHRASE="not_used" ASF_USERNAME="gurwls223" export SKIP_TAG=1 - unset GIT_BRANCH unset RELEASE_VERSION else echo "Full release mode enabled" @@ -163,7 +171,7 @@ jobs: fi export ASF_PASSWORD GPG_PRIVATE_KEY GPG_PASSPHRASE ASF_USERNAME - [ -n "$GIT_BRANCH" ] && export GIT_BRANCH + export GIT_BRANCH="${GIT_BRANCH:-master}" [ -n "$RELEASE_VERSION" ] && export RELEASE_VERSION if [ "$DRYRUN_MODE" = "1" ]; then @@ -237,9 +245,18 @@ jobs: cp "$file" "$file.bak" for pattern in "${PATTERNS[@]}"; do [ -n "$pattern" ] || continue # Skip empty patterns - escaped_pattern=$(printf '%s\n' "$pattern" | sed 's/[\/&]/\\&/g') - sed -i "s/${escaped_pattern}/***/g" "$file" + + # Safely escape special characters for sed + escaped_pattern=${pattern//\\/\\\\} # Escape backslashes + escaped_pattern=${escaped_pattern//\//\\/} # Escape forward slashes + escaped_pattern=${escaped_pattern//&/\\&} # Escape & + escaped_pattern=${escaped_pattern//$'\n'/} # Remove newlines + escaped_pattern=${escaped_pattern//$'\r'/} # Remove carriage returns (optional) + + # Redact the pattern + sed -i.bak "s/${escaped_pattern}/***/g" "$file" done + rm -f "$file.bak" done # Zip logs/output diff --git a/.gitignore b/.gitignore index b6a1e63c41920..bbf02496498c1 100644 --- a/.gitignore +++ b/.gitignore @@ -125,3 +125,7 @@ sql/api/gen/ sql/api/src/main/gen/ sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/SqlBaseLexer.tokens sql/api/src/main/antlr4/org/apache/spark/sql/catalyst/parser/gen/ + +tpcds-sf-1/ +tpcds-sf-1-text/ +tpcds-kit/ diff --git a/connect-examples/server-library-example/client/src/main/resources/log4j2.xml b/.mvn/extensions.xml similarity index 72% rename from connect-examples/server-library-example/client/src/main/resources/log4j2.xml rename to .mvn/extensions.xml index 21b0d9719193e..da2aee8827a54 100644 --- a/connect-examples/server-library-example/client/src/main/resources/log4j2.xml +++ b/.mvn/extensions.xml @@ -15,16 +15,10 @@ ~ See the License for the specific language governing permissions and ~ limitations under the License. --> - - - - - - - - - - - - - + + + eu.maveniverse.maven.nisse + extension + 0.4.6 + + diff --git a/.mvn/jvm.config b/.mvn/jvm.config index 81b88d8173419..b41dca0e56a6b 100644 --- a/.mvn/jvm.config +++ b/.mvn/jvm.config @@ -1 +1,4 @@ +-XX:+IgnoreUnrecognizedVMOptions +-XX:+UnlockDiagnosticVMOptions +-XX:GCLockerRetryAllocationCount=100 --enable-native-access=ALL-UNNAMED diff --git a/.mvn/maven.config b/.mvn/maven.config new file mode 100644 index 0000000000000..e61f1a94abdef --- /dev/null +++ b/.mvn/maven.config @@ -0,0 +1 @@ +-Dnisse.compat.osDetector diff --git a/LICENSE b/LICENSE index 9b1e96a44a58c..ef8192ec38d05 100644 --- a/LICENSE +++ b/LICENSE @@ -217,6 +217,7 @@ core/src/main/resources/org/apache/spark/ui/static/vis* connector/spark-ganglia-lgpl/src/main/java/com/codahale/metrics/ganglia/GangliaReporter.java core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.min.js core/src/main/resources/org/apache/spark/ui/static/d3-flamegraph.css +mllib-local/src/main/scala/scala/collection/compat/package.scala Python Software Foundation License ---------------------------------- diff --git a/LICENSE-binary b/LICENSE-binary index 0c3c7aecb71ac..6ce7249e02e3b 100644 --- a/LICENSE-binary +++ b/LICENSE-binary @@ -365,8 +365,6 @@ org.apache.xbean:xbean-asm9-shaded org.apache.yetus:audience-annotations org.apache.zookeeper:zookeeper org.apache.zookeeper:zookeeper-jute -org.codehaus.jackson:jackson-core-asl -org.codehaus.jackson:jackson-mapper-asl org.datanucleus:datanucleus-api-jdo org.datanucleus:datanucleus-core org.datanucleus:datanucleus-rdbms @@ -388,7 +386,6 @@ org.glassfish.jersey.core:jersey-common org.glassfish.jersey.core:jersey-server org.glassfish.jersey.inject:jersey-hk2 org.javassist:javassist -org.jetbrains:annotations org.json4s:json4s-ast_2.13 org.json4s:json4s-core_2.13 org.json4s:json4s-jackson-core_2.13 @@ -401,7 +398,6 @@ org.rocksdb:rocksdbjni org.scala-lang:scala-compiler org.scala-lang:scala-library org.scala-lang:scala-reflect -org.scala-lang.modules:scala-collection-compat_2.13 org.scala-lang.modules:scala-parallel-collections_2.13 org.scala-lang.modules:scala-parser-combinators_2.13 org.scala-lang.modules:scala-xml_2.13 @@ -442,7 +438,6 @@ com.github.luben:zstd-jni com.github.wendykierp:JTransforms javolution:javolution jline:jline -org.jodd:jodd-core pl.edu.icm:JLargeArrays python/pyspark/errors/exceptions/tblib.py @@ -465,6 +460,7 @@ org.codehaus.janino:janino org.fusesource.leveldbjni:leveldbjni-all org.jline:jline org.jpmml:pmml-model +org.locationtech.jts:jts-core org.threeten:threeten-extra python/lib/py4j-*-src.zip @@ -483,7 +479,6 @@ dev.ludovic.netlib:blas dev.ludovic.netlib:arpack dev.ludovic.netlib:lapack net.razorvine:pickle -org.bouncycastle:bcprov-jdk18on org.checkerframework:checker-qual org.typelevel:algebra_2.13:jar org.typelevel:cats-kernel_2.13 diff --git a/README.md b/README.md index 0f0bf039550d7..65dfd67ac520e 100644 --- a/README.md +++ b/README.md @@ -32,11 +32,10 @@ This README file only contains basic setup instructions. | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_rockdb_as_ui_backend.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_rockdb_as_ui_backend.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_maven.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_maven.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_maven_java21.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_maven_java21.yml) | -| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_maven_java21_macos15.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_maven_java21_macos15.yml) | +| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_maven_java21_macos26.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_maven_java21_macos15.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_maven_java21_arm.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_maven_java21_arm.yml) | -| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_coverage.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_coverage.yml) | +| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_coverage.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_coverage.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_pypy3.10.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_pypy3.10.yml) | -| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.9.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.9.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.10.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.10.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.11_classic_only.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.11_classic_only.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.11_arm.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.11_arm.yml) | @@ -45,6 +44,7 @@ This README file only contains basic setup instructions. | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.12.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.12.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.13.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.13.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.13_nogil.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.13_nogil.yml) | +| | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_3.14.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_minimum.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_ps_minimum.yml) | | | [![GitHub Actions Build](https://github.com/apache/spark/actions/workflows/build_python_connect35.yml/badge.svg)](https://github.com/apache/spark/actions/workflows/build_python_connect35.yml) | diff --git a/assembly/pom.xml b/assembly/pom.xml index a85ac5d9bc837..0e6012062313e 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -125,6 +125,18 @@ provided + + org.apache.spark + spark-connect-client-jdbc_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-connect-shims_${scala.binary.version} + + + provided + - - org.bouncycastle - bcprov-jdk18on - ${hadoop.deps.scope} - @@ -221,6 +223,20 @@ + + copy-connect-client-jdbc-jar + package + + exec + + + cp + + ${basedir}/../sql/connect/client/jdbc/target/spark-connect-client-jdbc_${scala.binary.version}-${project.version}.jar + ${basedir}/target/scala-${scala.binary.version}/jars/connect-repl + + + @@ -306,13 +322,6 @@ provided provided - provided - - - - hive-jackson-provided - - provided diff --git a/bin/load-spark-env.sh b/bin/load-spark-env.sh index 4ab35ad28751e..8db58ad387e81 100644 --- a/bin/load-spark-env.sh +++ b/bin/load-spark-env.sh @@ -65,6 +65,6 @@ export SPARK_SCALA_VERSION=2.13 #fi # Append jline option to enable the Beeline process to run in background. -if [ -e /usr/bin/tty -a "`tty`" != "not a tty" -a ! -p /dev/stdin ]; then +if [[ ( ! $(ps -o stat= -p $$ 2>/dev/null) =~ "+" ) && ! ( -p /dev/stdin ) ]]; then export SPARK_BEELINE_OPTS="$SPARK_BEELINE_OPTS -Djline.terminal=jline.UnsupportedTerminal" fi diff --git a/bin/spark-pipelines b/bin/spark-pipelines index 52baeeafab08a..16ec90e3a1aec 100755 --- a/bin/spark-pipelines +++ b/bin/spark-pipelines @@ -30,4 +30,11 @@ fi export PYTHONPATH="${SPARK_HOME}/python/:$PYTHONPATH" export PYTHONPATH="${SPARK_HOME}/python/lib/py4j-0.10.9.9-src.zip:$PYTHONPATH" -$PYSPARK_PYTHON "${SPARK_HOME}"/python/pyspark/pipelines/cli.py "$@" +SDP_CLI_PY_FILE_PATH=$("${PYSPARK_PYTHON}" - <<'EOF' +import pyspark, os +from pathlib import Path +print(Path(os.path.dirname(pyspark.__file__)) / "pipelines" / "cli.py") +EOF +) + +exec "${SPARK_HOME}"/bin/spark-class org.apache.spark.deploy.SparkPipelines "$SDP_CLI_PY_FILE_PATH" "$@" diff --git a/build/sbt b/build/sbt index db9d3b345ff6f..fe446fd813fcf 100755 --- a/build/sbt +++ b/build/sbt @@ -36,7 +36,7 @@ fi declare -r noshare_opts="-Dsbt.global.base=project/.sbtboot -Dsbt.boot.directory=project/.boot -Dsbt.ivy.home=project/.ivy" declare -r sbt_opts_file=".sbtopts" declare -r etc_sbt_opts_file="/etc/sbt/sbtopts" -declare -r default_sbt_opts="-Xss64m" +declare -r default_sbt_opts="-Xss64m -XX:+IgnoreUnrecognizedVMOptions -XX:+UnlockDiagnosticVMOptions -XX:GCLockerRetryAllocationCount=100 --enable-native-access=ALL-UNNAMED" usage() { cat <slf4j-api - - commons-io - commons-io - test - - - org.apache.commons - commons-lang3 - test - - org.apache.logging.log4j log4j-api diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java index 5265881e990e9..a9d6784805f6d 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/ArrayWrappers.java @@ -19,7 +19,7 @@ import java.util.Arrays; -import com.google.common.base.Preconditions; +import org.apache.spark.network.util.JavaUtils; /** * A factory for array wrappers so that arrays can be used as keys in a map, sorted or not. @@ -38,7 +38,7 @@ class ArrayWrappers { @SuppressWarnings("unchecked") public static Comparable forArray(Object a) { - Preconditions.checkArgument(a.getClass().isArray()); + JavaUtils.checkArgument(a.getClass().isArray(), "Input should be an array"); Comparable ret; if (a instanceof int[] ia) { ret = new ComparableIntArray(ia); @@ -47,7 +47,8 @@ public static Comparable forArray(Object a) { } else if (a instanceof byte[] ba) { ret = new ComparableByteArray(ba); } else { - Preconditions.checkArgument(!a.getClass().getComponentType().isPrimitive()); + JavaUtils.checkArgument(!a.getClass().getComponentType().isPrimitive(), + "Array element is primitive"); ret = new ComparableObjectArray((Object[]) a); } return (Comparable) ret; diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java index a353a53d4b8d7..9a45a10532dee 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/InMemoryStore.java @@ -24,6 +24,7 @@ import java.util.HashSet; import java.util.List; import java.util.NoSuchElementException; +import java.util.Objects; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.function.BiConsumer; @@ -31,10 +32,8 @@ import java.util.stream.Collectors; import java.util.stream.Stream; -import com.google.common.base.Objects; -import com.google.common.base.Preconditions; - import org.apache.spark.annotation.Private; +import org.apache.spark.network.util.JavaUtils; /** * Implementation of KVStore that keeps data deserialized in memory. This store does not index @@ -70,7 +69,7 @@ public long count(Class type, String index, Object indexedValue) throws Excep Object comparable = asKey(indexedValue); KVTypeInfo.Accessor accessor = list.getIndexAccessor(index); for (Object o : view(type)) { - if (Objects.equal(comparable, asKey(accessor.get(o)))) { + if (Objects.equals(comparable, asKey(accessor.get(o)))) { count++; } } @@ -419,7 +418,7 @@ private List copyElements() { // Go through all the values in `data` and collect all the objects has certain parent // value. This can be slow when there is a large number of entries in `data`. KVTypeInfo.Accessor parentGetter = ti.getParentAccessor(index); - Preconditions.checkArgument(parentGetter != null, "Parent filter for non-child index."); + JavaUtils.checkArgument(parentGetter != null, "Parent filter for non-child index."); return data.values().stream() .filter(e -> compare(e, parentGetter, parentKey) == 0) .collect(Collectors.toList()); diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreView.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreView.java index 90135268fdef7..cfdcc1a2c8789 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreView.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVStoreView.java @@ -17,9 +17,10 @@ package org.apache.spark.util.kvstore; -import com.google.common.base.Preconditions; +import java.util.Objects; import org.apache.spark.annotation.Private; +import org.apache.spark.network.util.JavaUtils; /** * A configurable view that allows iterating over values in a {@link KVStore}. @@ -58,7 +59,7 @@ public KVStoreView reverse() { * Iterates according to the given index. */ public KVStoreView index(String name) { - this.index = Preconditions.checkNotNull(name); + this.index = Objects.requireNonNull(name); return this; } @@ -96,7 +97,7 @@ public KVStoreView last(Object value) { * Stops iteration after a number of elements has been retrieved. */ public KVStoreView max(long max) { - Preconditions.checkArgument(max > 0L, "max must be positive."); + JavaUtils.checkArgument(max > 0L, "max must be positive."); this.max = max; return this; } diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java index bf7c256fc94ff..86f32abc9075d 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/KVTypeInfo.java @@ -23,9 +23,8 @@ import java.util.Map; import java.util.stream.Stream; -import com.google.common.base.Preconditions; - import org.apache.spark.annotation.Private; +import org.apache.spark.network.util.JavaUtils; /** * Wrapper around types managed in a KVStore, providing easy access to their indexed fields. @@ -56,7 +55,7 @@ public KVTypeInfo(Class type) { KVIndex idx = m.getAnnotation(KVIndex.class); if (idx != null) { checkIndex(idx, indices); - Preconditions.checkArgument(m.getParameterCount() == 0, + JavaUtils.checkArgument(m.getParameterCount() == 0, "Annotated method %s::%s should not have any parameters.", type.getName(), m.getName()); m.setAccessible(true); indices.put(idx.value(), idx); @@ -64,29 +63,29 @@ public KVTypeInfo(Class type) { } } - Preconditions.checkArgument(indices.containsKey(KVIndex.NATURAL_INDEX_NAME), + JavaUtils.checkArgument(indices.containsKey(KVIndex.NATURAL_INDEX_NAME), "No natural index defined for type %s.", type.getName()); for (KVIndex idx : indices.values()) { if (!idx.parent().isEmpty()) { KVIndex parent = indices.get(idx.parent()); - Preconditions.checkArgument(parent != null, + JavaUtils.checkArgument(parent != null, "Cannot find parent %s of index %s.", idx.parent(), idx.value()); - Preconditions.checkArgument(parent.parent().isEmpty(), + JavaUtils.checkArgument(parent.parent().isEmpty(), "Parent index %s of index %s cannot be itself a child index.", idx.parent(), idx.value()); } } } private void checkIndex(KVIndex idx, Map indices) { - Preconditions.checkArgument(idx.value() != null && !idx.value().isEmpty(), + JavaUtils.checkArgument(idx.value() != null && !idx.value().isEmpty(), "No name provided for index in type %s.", type.getName()); - Preconditions.checkArgument( + JavaUtils.checkArgument( !idx.value().startsWith("_") || idx.value().equals(KVIndex.NATURAL_INDEX_NAME), "Index name %s (in type %s) is not allowed.", idx.value(), type.getName()); - Preconditions.checkArgument(idx.parent().isEmpty() || !idx.parent().equals(idx.value()), + JavaUtils.checkArgument(idx.parent().isEmpty() || !idx.parent().equals(idx.value()), "Index %s cannot be parent of itself.", idx.value()); - Preconditions.checkArgument(!indices.containsKey(idx.value()), + JavaUtils.checkArgument(!indices.containsKey(idx.value()), "Duplicate index %s for type %s.", idx.value(), type.getName()); } @@ -104,7 +103,7 @@ public Stream indices() { Accessor getAccessor(String indexName) { Accessor a = accessors.get(indexName); - Preconditions.checkArgument(a != null, "No index %s.", indexName); + JavaUtils.checkArgument(a != null, "No index %s.", indexName); return a; } diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java index 74843806b3ea0..91b2cde2d84fe 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDB.java @@ -30,8 +30,6 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; import org.fusesource.leveldbjni.JniDBFactory; import org.iq80.leveldb.DB; import org.iq80.leveldb.DBIterator; @@ -39,6 +37,7 @@ import org.iq80.leveldb.WriteBatch; import org.apache.spark.annotation.Private; +import org.apache.spark.network.util.JavaUtils; /** * Implementation of KVStore that uses LevelDB as the underlying data store. @@ -137,20 +136,20 @@ T get(byte[] key, Class klass) throws Exception { } private void put(byte[] key, Object value) throws Exception { - Preconditions.checkArgument(value != null, "Null values are not allowed."); + JavaUtils.checkArgument(value != null, "Null values are not allowed."); db().put(key, serializer.serialize(value)); } @Override public T read(Class klass, Object naturalKey) throws Exception { - Preconditions.checkArgument(naturalKey != null, "Null keys are not allowed."); + JavaUtils.checkArgument(naturalKey != null, "Null keys are not allowed."); byte[] key = getTypeInfo(klass).naturalIndex().start(null, naturalKey); return get(key, klass); } @Override public void write(Object value) throws Exception { - Preconditions.checkArgument(value != null, "Null values are not allowed."); + JavaUtils.checkArgument(value != null, "Null values are not allowed."); LevelDBTypeInfo ti = getTypeInfo(value.getClass()); try (WriteBatch batch = db().createWriteBatch()) { @@ -163,7 +162,7 @@ public void write(Object value) throws Exception { } public void writeAll(List values) throws Exception { - Preconditions.checkArgument(values != null && !values.isEmpty(), + JavaUtils.checkArgument(values != null && !values.isEmpty(), "Non-empty values required."); // Group by class, in case there are values from different classes in the values @@ -225,7 +224,7 @@ private void updateBatch( @Override public void delete(Class type, Object naturalKey) throws Exception { - Preconditions.checkArgument(naturalKey != null, "Null keys are not allowed."); + JavaUtils.checkArgument(naturalKey != null, "Null keys are not allowed."); try (WriteBatch batch = db().createWriteBatch()) { LevelDBTypeInfo ti = getTypeInfo(type); byte[] key = ti.naturalIndex().start(null, naturalKey); @@ -256,7 +255,7 @@ public Iterator iterator() { iteratorTracker.add(new WeakReference<>(it)); return it; } catch (Exception e) { - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } } diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java index 29ed37ffa44e5..d80e002ddb06e 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBIterator.java @@ -26,12 +26,11 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; import org.iq80.leveldb.DBIterator; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; +import org.apache.spark.network.util.JavaUtils; class LevelDBIterator implements KVStoreIterator { @@ -66,7 +65,7 @@ class LevelDBIterator implements KVStoreIterator { this.resourceCleaner = new ResourceCleaner(it, db); this.cleanable = CLEANER.register(this, this.resourceCleaner); - Preconditions.checkArgument(!index.isChild() || params.parent != null, + JavaUtils.checkArgument(!index.isChild() || params.parent != null, "Cannot iterate over child index %s without parent value.", params.index); byte[] parent = index.isChild() ? index.parent().childPrefix(params.parent) : null; @@ -151,7 +150,7 @@ public T next() { next = null; return ret; } catch (Exception e) { - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } } diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java index 21a412a36f39b..341e34606a9b2 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/LevelDBTypeInfo.java @@ -18,14 +18,17 @@ package org.apache.spark.util.kvstore; import java.lang.reflect.Array; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Map; +import java.util.Objects; import static java.nio.charset.StandardCharsets.UTF_8; -import com.google.common.base.Preconditions; import org.iq80.leveldb.WriteBatch; +import org.apache.spark.network.util.JavaUtils; + /** * Holds metadata about app-specific types stored in LevelDB. Serves as a cache for data collected * via reflection, to make it cheaper to access it multiple times. @@ -162,7 +165,7 @@ Index naturalIndex() { Index index(String name) { Index i = indices.get(name); - Preconditions.checkArgument(i != null, "Index %s does not exist for type %s.", name, + JavaUtils.checkArgument(i != null, "Index %s does not exist for type %s.", name, type.getName()); return i; } @@ -251,7 +254,7 @@ Index parent() { * same parent index exist. */ byte[] childPrefix(Object value) { - Preconditions.checkState(parent == null, "Not a parent index."); + JavaUtils.checkState(parent == null, "Not a parent index."); return buildKey(name, toParentKey(value)); } @@ -266,9 +269,9 @@ Object getValue(Object entity) throws Exception { private void checkParent(byte[] prefix) { if (prefix != null) { - Preconditions.checkState(parent != null, "Parent prefix provided for parent index."); + JavaUtils.checkState(parent != null, "Parent prefix provided for parent index."); } else { - Preconditions.checkState(parent == null, "Parent prefix missing for child index."); + JavaUtils.checkState(parent == null, "Parent prefix missing for child index."); } } @@ -305,8 +308,9 @@ byte[] end(byte[] prefix, Object value) { /** The full key in the index that identifies the given entity. */ byte[] entityKey(byte[] prefix, Object entity) throws Exception { Object indexValue = getValue(entity); - Preconditions.checkNotNull(indexValue, "Null index value for %s in type %s.", - name, type.getName()); + Objects.requireNonNull(indexValue, () -> + String.format( + "Null index value for %s in type %s.", Arrays.toString(name), type.getName())); byte[] entityKey = start(prefix, indexValue); if (!isNatural) { entityKey = buildKey(false, entityKey, toKey(naturalIndex().getValue(entity))); @@ -331,8 +335,9 @@ private void addOrRemove( byte[] naturalKey, byte[] prefix) throws Exception { Object indexValue = getValue(entity); - Preconditions.checkNotNull(indexValue, "Null index value for %s in type %s.", - name, type.getName()); + Objects.requireNonNull(indexValue, () -> + String.format( + "Null index value for %s in type %s.", Arrays.toString(name), type.getName())); byte[] entityKey = start(prefix, indexValue); if (!isNatural) { diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java index 8c9ac5a232001..4b69b9441dc32 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDB.java @@ -31,11 +31,10 @@ import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; import org.rocksdb.*; import org.apache.spark.annotation.Private; +import org.apache.spark.network.util.JavaUtils; /** * Implementation of KVStore that uses RocksDB as the underlying data store. @@ -170,20 +169,20 @@ T get(byte[] key, Class klass) throws Exception { } private void put(byte[] key, Object value) throws Exception { - Preconditions.checkArgument(value != null, "Null values are not allowed."); + JavaUtils.checkArgument(value != null, "Null values are not allowed."); db().put(key, serializer.serialize(value)); } @Override public T read(Class klass, Object naturalKey) throws Exception { - Preconditions.checkArgument(naturalKey != null, "Null keys are not allowed."); + JavaUtils.checkArgument(naturalKey != null, "Null keys are not allowed."); byte[] key = getTypeInfo(klass).naturalIndex().start(null, naturalKey); return get(key, klass); } @Override public void write(Object value) throws Exception { - Preconditions.checkArgument(value != null, "Null values are not allowed."); + JavaUtils.checkArgument(value != null, "Null values are not allowed."); RocksDBTypeInfo ti = getTypeInfo(value.getClass()); byte[] data = serializer.serialize(value); synchronized (ti) { @@ -195,7 +194,7 @@ public void write(Object value) throws Exception { } public void writeAll(List values) throws Exception { - Preconditions.checkArgument(values != null && !values.isEmpty(), + JavaUtils.checkArgument(values != null && !values.isEmpty(), "Non-empty values required."); // Group by class, in case there are values from different classes in the values @@ -257,7 +256,7 @@ private void updateBatch( @Override public void delete(Class type, Object naturalKey) throws Exception { - Preconditions.checkArgument(naturalKey != null, "Null keys are not allowed."); + JavaUtils.checkArgument(naturalKey != null, "Null keys are not allowed."); try (WriteBatch writeBatch = new WriteBatch()) { RocksDBTypeInfo ti = getTypeInfo(type); byte[] key = ti.naturalIndex().start(null, naturalKey); @@ -288,7 +287,7 @@ public Iterator iterator() { iteratorTracker.add(new WeakReference<>(it)); return it; } catch (Exception e) { - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } } diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java index e350ddc2d445a..d37a4bd7b0b2d 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBIterator.java @@ -23,10 +23,10 @@ import java.util.concurrent.atomic.AtomicBoolean; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; import org.rocksdb.RocksIterator; +import org.apache.spark.network.util.JavaUtils; + class RocksDBIterator implements KVStoreIterator { private static final Cleaner CLEANER = Cleaner.create(); @@ -58,7 +58,7 @@ class RocksDBIterator implements KVStoreIterator { this.resourceCleaner = new RocksDBIterator.ResourceCleaner(it, db); this.cleanable = CLEANER.register(this, resourceCleaner); - Preconditions.checkArgument(!index.isChild() || params.parent != null, + JavaUtils.checkArgument(!index.isChild() || params.parent != null, "Cannot iterate over child index %s without parent value.", params.index); byte[] parent = index.isChild() ? index.parent().childPrefix(params.parent) : null; @@ -137,7 +137,7 @@ public T next() { next = null; return ret; } catch (Exception e) { - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } } diff --git a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBTypeInfo.java b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBTypeInfo.java index 3d1ba72f94d77..3b325a56ff2ca 100644 --- a/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBTypeInfo.java +++ b/common/kvstore/src/main/java/org/apache/spark/util/kvstore/RocksDBTypeInfo.java @@ -17,14 +17,17 @@ package org.apache.spark.util.kvstore; -import com.google.common.base.Preconditions; -import org.rocksdb.RocksDBException; -import org.rocksdb.WriteBatch; - import java.lang.reflect.Array; +import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.Map; +import java.util.Objects; + +import org.rocksdb.RocksDBException; +import org.rocksdb.WriteBatch; + +import org.apache.spark.network.util.JavaUtils; import static java.nio.charset.StandardCharsets.UTF_8; @@ -164,7 +167,7 @@ Index naturalIndex() { Index index(String name) { Index i = indices.get(name); - Preconditions.checkArgument(i != null, "Index %s does not exist for type %s.", name, + JavaUtils.checkArgument(i != null, "Index %s does not exist for type %s.", name, type.getName()); return i; } @@ -253,7 +256,7 @@ Index parent() { * same parent index exist. */ byte[] childPrefix(Object value) { - Preconditions.checkState(parent == null, "Not a parent index."); + JavaUtils.checkState(parent == null, "Not a parent index."); return buildKey(name, toParentKey(value)); } @@ -268,9 +271,9 @@ Object getValue(Object entity) throws Exception { private void checkParent(byte[] prefix) { if (prefix != null) { - Preconditions.checkState(parent != null, "Parent prefix provided for parent index."); + JavaUtils.checkState(parent != null, "Parent prefix provided for parent index."); } else { - Preconditions.checkState(parent == null, "Parent prefix missing for child index."); + JavaUtils.checkState(parent == null, "Parent prefix missing for child index."); } } @@ -307,8 +310,9 @@ byte[] end(byte[] prefix, Object value) { /** The full key in the index that identifies the given entity. */ byte[] entityKey(byte[] prefix, Object entity) throws Exception { Object indexValue = getValue(entity); - Preconditions.checkNotNull(indexValue, "Null index value for %s in type %s.", - name, type.getName()); + Objects.requireNonNull(indexValue, () -> + String.format( + "Null index value for %s in type %s.", Arrays.toString(name), type.getName())); byte[] entityKey = start(prefix, indexValue); if (!isNatural) { entityKey = buildKey(false, entityKey, toKey(naturalIndex().getValue(entity))); @@ -333,8 +337,9 @@ private void addOrRemove( byte[] naturalKey, byte[] prefix) throws Exception { Object indexValue = getValue(entity); - Preconditions.checkNotNull(indexValue, "Null index value for %s in type %s.", - name, type.getName()); + Objects.requireNonNull(indexValue, () -> + String.format( + "Null index value for %s in type %s.", Arrays.toString(name), type.getName())); byte[] entityKey = start(prefix, indexValue); if (!isNatural) { diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java index 72c3690d1a187..dd99d4876763e 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/DBIteratorSuite.java @@ -499,7 +499,9 @@ private KVStoreView view() throws Exception { private List collect(KVStoreView view) throws Exception { try (KVStoreIterator iterator = view.closeableIterator()) { - return Lists.newArrayList(iterator); + List list = new ArrayList<>(); + iterator.forEachRemaining(list::add); + return list; } } diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java index 81f1d21c49c24..c7ae03f078297 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/InMemoryStoreSuite.java @@ -17,9 +17,9 @@ package org.apache.spark.util.kvstore; +import java.util.Set; import java.util.NoSuchElementException; -import com.google.common.collect.ImmutableSet; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; @@ -147,25 +147,25 @@ public void testRemoveAll() throws Exception { assertFalse(store.removeAllByIndexValues( ArrayKeyIndexType.class, KVIndex.NATURAL_INDEX_NAME, - ImmutableSet.of(new int[] {10, 10, 10}, new int[] { 3, 3, 3 }))); + Set.of(new int[] {10, 10, 10}, new int[] { 3, 3, 3 }))); assertEquals(9, store.count(ArrayKeyIndexType.class)); assertTrue(store.removeAllByIndexValues( ArrayKeyIndexType.class, KVIndex.NATURAL_INDEX_NAME, - ImmutableSet.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 }))); + Set.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 }))); assertEquals(7, store.count(ArrayKeyIndexType.class)); assertTrue(store.removeAllByIndexValues( ArrayKeyIndexType.class, "id", - ImmutableSet.of(new String [] { "things" }))); + Set.of(new String [] { "things" }))); assertEquals(4, store.count(ArrayKeyIndexType.class)); assertTrue(store.removeAllByIndexValues( ArrayKeyIndexType.class, "id", - ImmutableSet.of(new String [] { "more things" }))); + Set.of(new String [] { "more things" }))); assertEquals(0, store.count(ArrayKeyIndexType.class)); } diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java index ff6db8fc34c96..a9bfea44c4f30 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBBenchmark.java @@ -28,7 +28,6 @@ import com.codahale.metrics.Slf4jReporter; import com.codahale.metrics.Snapshot; import com.codahale.metrics.Timer; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeEach; @@ -37,6 +36,9 @@ // checkstyle.off: RegexpSinglelineJava import org.slf4j.LoggerFactory; // checkstyle.on: RegexpSinglelineJava + +import org.apache.spark.network.util.JavaUtils; + import static org.junit.jupiter.api.Assertions.*; /** @@ -79,7 +81,7 @@ public void cleanup() throws Exception { } } if (dbpath != null) { - FileUtils.deleteQuietly(dbpath); + JavaUtils.deleteQuietly(dbpath); } } diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBIteratorSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBIteratorSuite.java index 6ff6286654450..11afb9281d15b 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBIteratorSuite.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBIteratorSuite.java @@ -19,11 +19,12 @@ import java.io.File; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.SystemUtils; import org.junit.jupiter.api.AfterAll; import static org.junit.jupiter.api.Assumptions.assumeFalse; +import org.apache.spark.network.util.JavaUtils; +import org.apache.spark.util.SparkSystemUtils$; + public class LevelDBIteratorSuite extends DBIteratorSuite { private static File dbpath; @@ -35,13 +36,13 @@ public static void cleanup() throws Exception { db.close(); } if (dbpath != null) { - FileUtils.deleteQuietly(dbpath); + JavaUtils.deleteQuietly(dbpath); } } @Override protected KVStore createStore() throws Exception { - assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64")); + assumeFalse(SparkSystemUtils$.MODULE$.isMacOnAppleSilicon()); dbpath = File.createTempFile("test.", ".ldb"); dbpath.delete(); db = new LevelDB(dbpath); diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java index 040ccce70b5a1..25e6664d28dd1 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/LevelDBSuite.java @@ -25,18 +25,19 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import java.util.Set; import java.util.Spliterators; import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import com.google.common.collect.ImmutableSet; -import org.apache.commons.io.FileUtils; -import org.apache.commons.lang3.SystemUtils; import org.iq80.leveldb.DBIterator; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; +import org.apache.spark.network.util.JavaUtils; +import org.apache.spark.util.SparkSystemUtils$; + import static org.junit.jupiter.api.Assertions.*; import static org.junit.jupiter.api.Assumptions.assumeFalse; @@ -51,13 +52,13 @@ public void cleanup() throws Exception { db.close(); } if (dbpath != null) { - FileUtils.deleteQuietly(dbpath); + JavaUtils.deleteQuietly(dbpath); } } @BeforeEach public void setup() throws Exception { - assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64")); + assumeFalse(SparkSystemUtils$.MODULE$.isMacOnAppleSilicon()); dbpath = File.createTempFile("test.", ".ldb"); dbpath.delete(); db = new LevelDB(dbpath); @@ -219,19 +220,19 @@ public void testRemoveAll() throws Exception { db.removeAllByIndexValues( ArrayKeyIndexType.class, KVIndex.NATURAL_INDEX_NAME, - ImmutableSet.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 })); + Set.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 })); assertEquals(7, db.count(ArrayKeyIndexType.class)); db.removeAllByIndexValues( ArrayKeyIndexType.class, "id", - ImmutableSet.of(new String[] { "things" })); + Set.of(new String[] { "things" })); assertEquals(4, db.count(ArrayKeyIndexType.class)); db.removeAllByIndexValues( ArrayKeyIndexType.class, "id", - ImmutableSet.of(new String[] { "more things" })); + Set.of(new String[] { "more things" })); assertEquals(0, db.count(ArrayKeyIndexType.class)); } @@ -305,7 +306,7 @@ public void testCloseLevelDBIterator() throws Exception { } dbForCloseTest.close(); assertTrue(dbPathForCloseTest.exists()); - FileUtils.deleteQuietly(dbPathForCloseTest); + JavaUtils.deleteQuietly(dbPathForCloseTest); assertTrue(!dbPathForCloseTest.exists()); } @@ -419,7 +420,7 @@ public void testResourceCleaner() throws Exception { assertTrue(resourceCleaner.isCompleted()); } finally { dbForCleanerTest.close(); - FileUtils.deleteQuietly(dbPathForCleanerTest); + JavaUtils.deleteQuietly(dbPathForCleanerTest); } } diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java deleted file mode 100644 index c1b8009e97e66..0000000000000 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBBenchmark.java +++ /dev/null @@ -1,287 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.util.kvstore; - -import java.io.File; -import java.util.ArrayList; -import java.util.Collections; -import java.util.List; -import java.util.Map; -import java.util.concurrent.atomic.AtomicInteger; - -import com.codahale.metrics.MetricRegistry; -import com.codahale.metrics.Slf4jReporter; -import com.codahale.metrics.Snapshot; -import com.codahale.metrics.Timer; -import org.apache.commons.io.FileUtils; -import org.junit.jupiter.api.AfterEach; -import org.junit.jupiter.api.AfterAll; -import org.junit.jupiter.api.BeforeEach; -import org.junit.jupiter.api.Disabled; -import org.junit.jupiter.api.Test; -// checkstyle.off: RegexpSinglelineJava -import org.slf4j.LoggerFactory; -// checkstyle.on: RegexpSinglelineJava -import static org.junit.jupiter.api.Assertions.*; - -/** - * A set of small benchmarks for the RocksDB implementation. - * - * The benchmarks are run over two different types (one with just a natural index, and one - * with a ref index), over a set of 2^20 elements, and the following tests are performed: - * - * - write (then update) elements in sequential natural key order - * - write (then update) elements in random natural key order - * - iterate over natural index, ascending and descending - * - iterate over ref index, ascending and descending - */ -@Disabled -public class RocksDBBenchmark { - - private static final int COUNT = 1024; - private static final AtomicInteger IDGEN = new AtomicInteger(); - private static final MetricRegistry metrics = new MetricRegistry(); - private static final Timer dbCreation = metrics.timer("dbCreation"); - private static final Timer dbClose = metrics.timer("dbClose"); - - private RocksDB db; - private File dbpath; - - @BeforeEach - public void setup() throws Exception { - dbpath = File.createTempFile("test.", ".rdb"); - dbpath.delete(); - try(Timer.Context ctx = dbCreation.time()) { - db = new RocksDB(dbpath); - } - } - - @AfterEach - public void cleanup() throws Exception { - if (db != null) { - try(Timer.Context ctx = dbClose.time()) { - db.close(); - } - } - if (dbpath != null) { - FileUtils.deleteQuietly(dbpath); - } - } - - @AfterAll - public static void report() { - if (metrics.getTimers().isEmpty()) { - return; - } - - int headingPrefix = 0; - for (Map.Entry e : metrics.getTimers().entrySet()) { - headingPrefix = Math.max(e.getKey().length(), headingPrefix); - } - headingPrefix += 4; - - StringBuilder heading = new StringBuilder(); - for (int i = 0; i < headingPrefix; i++) { - heading.append(" "); - } - heading.append("\tcount"); - heading.append("\tmean"); - heading.append("\tmin"); - heading.append("\tmax"); - heading.append("\t95th"); - System.out.println(heading); - - for (Map.Entry e : metrics.getTimers().entrySet()) { - StringBuilder row = new StringBuilder(); - row.append(e.getKey()); - for (int i = 0; i < headingPrefix - e.getKey().length(); i++) { - row.append(" "); - } - - Snapshot s = e.getValue().getSnapshot(); - row.append("\t").append(e.getValue().getCount()); - row.append("\t").append(toMs(s.getMean())); - row.append("\t").append(toMs(s.getMin())); - row.append("\t").append(toMs(s.getMax())); - row.append("\t").append(toMs(s.get95thPercentile())); - - System.out.println(row); - } - - Slf4jReporter.forRegistry(metrics).outputTo(LoggerFactory.getLogger(RocksDBBenchmark.class)) - .build().report(); - } - - private static String toMs(double nanos) { - return String.format("%.3f", nanos / 1000 / 1000); - } - - @Test - public void sequentialWritesNoIndex() throws Exception { - List entries = createSimpleType(); - writeAll(entries, "sequentialWritesNoIndex"); - writeAll(entries, "sequentialUpdatesNoIndex"); - deleteNoIndex(entries, "sequentialDeleteNoIndex"); - } - - @Test - public void randomWritesNoIndex() throws Exception { - List entries = createSimpleType(); - - Collections.shuffle(entries); - writeAll(entries, "randomWritesNoIndex"); - - Collections.shuffle(entries); - writeAll(entries, "randomUpdatesNoIndex"); - - Collections.shuffle(entries); - deleteNoIndex(entries, "randomDeletesNoIndex"); - } - - @Test - public void sequentialWritesIndexedType() throws Exception { - List entries = createIndexedType(); - writeAll(entries, "sequentialWritesIndexed"); - writeAll(entries, "sequentialUpdatesIndexed"); - deleteIndexed(entries, "sequentialDeleteIndexed"); - } - - @Test - public void randomWritesIndexedTypeAndIteration() throws Exception { - List entries = createIndexedType(); - - Collections.shuffle(entries); - writeAll(entries, "randomWritesIndexed"); - - Collections.shuffle(entries); - writeAll(entries, "randomUpdatesIndexed"); - - // Run iteration benchmarks here since we've gone through the trouble of writing all - // the data already. - KVStoreView view = db.view(IndexedType.class); - iterate(view, "naturalIndex"); - iterate(view.reverse(), "naturalIndexDescending"); - iterate(view.index("name"), "refIndex"); - iterate(view.index("name").reverse(), "refIndexDescending"); - - Collections.shuffle(entries); - deleteIndexed(entries, "randomDeleteIndexed"); - } - - private void iterate(KVStoreView view, String name) throws Exception { - Timer create = metrics.timer(name + "CreateIterator"); - Timer iter = metrics.timer(name + "Iteration"); - KVStoreIterator it = null; - { - // Create the iterator several times, just to have multiple data points. - for (int i = 0; i < 1024; i++) { - if (it != null) { - it.close(); - } - try(Timer.Context ctx = create.time()) { - it = view.closeableIterator(); - } - } - } - try { - while (it.hasNext()) { - try (Timer.Context ctx = iter.time()) { - it.next(); - } - } - } finally { - if (it != null) { - it.close(); - } - } - } - - private void writeAll(List entries, String timerName) throws Exception { - Timer timer = newTimer(timerName); - for (Object o : entries) { - try(Timer.Context ctx = timer.time()) { - db.write(o); - } - } - } - - private void deleteNoIndex(List entries, String timerName) throws Exception { - Timer delete = newTimer(timerName); - for (SimpleType i : entries) { - try(Timer.Context ctx = delete.time()) { - db.delete(i.getClass(), i.key); - } - } - } - - private void deleteIndexed(List entries, String timerName) throws Exception { - Timer delete = newTimer(timerName); - for (IndexedType i : entries) { - try(Timer.Context ctx = delete.time()) { - db.delete(i.getClass(), i.key); - } - } - } - - private List createSimpleType() { - List entries = new ArrayList<>(); - for (int i = 0; i < COUNT; i++) { - SimpleType t = new SimpleType(); - t.key = IDGEN.getAndIncrement(); - t.name = "name" + (t.key % 1024); - entries.add(t); - } - return entries; - } - - private List createIndexedType() { - List entries = new ArrayList<>(); - for (int i = 0; i < COUNT; i++) { - IndexedType t = new IndexedType(); - t.key = IDGEN.getAndIncrement(); - t.name = "name" + (t.key % 1024); - entries.add(t); - } - return entries; - } - - private Timer newTimer(String name) { - assertNull(metrics.getTimers().get(name), "Timer already exists: " + name); - return metrics.timer(name); - } - - public static class SimpleType { - - @KVIndex - public int key; - - public String name; - - } - - public static class IndexedType { - - @KVIndex - public int key; - - @KVIndex("name") - public String name; - - } - -} diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java index f8d79484fae1a..f53ec870d1a11 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBIteratorSuite.java @@ -19,9 +19,10 @@ import java.io.File; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.AfterAll; +import org.apache.spark.network.util.JavaUtils; + public class RocksDBIteratorSuite extends DBIteratorSuite { private static File dbpath; @@ -33,7 +34,7 @@ public static void cleanup() throws Exception { db.close(); } if (dbpath != null) { - FileUtils.deleteQuietly(dbpath); + JavaUtils.deleteQuietly(dbpath); } } diff --git a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java index 34a12d8fddec8..1334386fde74d 100644 --- a/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java +++ b/common/kvstore/src/test/java/org/apache/spark/util/kvstore/RocksDBSuite.java @@ -25,17 +25,18 @@ import java.util.Iterator; import java.util.List; import java.util.NoSuchElementException; +import java.util.Set; import java.util.Spliterators; import java.util.stream.Collectors; import java.util.stream.StreamSupport; -import com.google.common.collect.ImmutableSet; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.rocksdb.RocksIterator; +import org.apache.spark.network.util.JavaUtils; + import static org.junit.jupiter.api.Assertions.*; public class RocksDBSuite { @@ -49,7 +50,7 @@ public void cleanup() throws Exception { db.close(); } if (dbpath != null) { - FileUtils.deleteQuietly(dbpath); + JavaUtils.deleteQuietly(dbpath); } } @@ -216,19 +217,19 @@ public void testRemoveAll() throws Exception { db.removeAllByIndexValues( ArrayKeyIndexType.class, KVIndex.NATURAL_INDEX_NAME, - ImmutableSet.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 })); + Set.of(new int[] {0, 0, 0}, new int[] { 2, 2, 2 })); assertEquals(7, db.count(ArrayKeyIndexType.class)); db.removeAllByIndexValues( ArrayKeyIndexType.class, "id", - ImmutableSet.of(new String[] { "things" })); + Set.of(new String[] { "things" })); assertEquals(4, db.count(ArrayKeyIndexType.class)); db.removeAllByIndexValues( ArrayKeyIndexType.class, "id", - ImmutableSet.of(new String[] { "more things" })); + Set.of(new String[] { "more things" })); assertEquals(0, db.count(ArrayKeyIndexType.class)); } @@ -302,7 +303,7 @@ public void testCloseRocksDBIterator() throws Exception { } dbForCloseTest.close(); assertTrue(dbPathForCloseTest.exists()); - FileUtils.deleteQuietly(dbPathForCloseTest); + JavaUtils.deleteQuietly(dbPathForCloseTest); assertTrue(!dbPathForCloseTest.exists()); } @@ -417,7 +418,7 @@ public void testResourceCleaner() throws Exception { assertTrue(resourceCleaner.isCompleted()); } finally { dbForCleanerTest.close(); - FileUtils.deleteQuietly(dbPathForCleanerTest); + JavaUtils.deleteQuietly(dbPathForCleanerTest); } } diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 2f2be88ac9dd1..51b782920e6d9 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -83,10 +83,6 @@ - - org.apache.commons - commons-lang3 - ${leveldbjni.group} leveldbjni-all @@ -174,7 +170,7 @@ org.apache.spark - spark-common-utils_${scala.binary.version} + spark-common-utils-java_${scala.binary.version} ${project.version} diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java index dd7c2061ec95b..eed43a8d28d90 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java +++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/FileSegmentManagedBuffer.java @@ -26,11 +26,8 @@ import java.nio.channels.FileChannel; import java.nio.file.StandardOpenOption; -import com.google.common.io.ByteStreams; import io.netty.channel.DefaultFileRegion; import io.netty.handler.stream.ChunkedStream; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.LimitedInputStream; @@ -100,7 +97,7 @@ public InputStream createInputStream() throws IOException { boolean shouldClose = true; try { is = new FileInputStream(file); - ByteStreams.skipFully(is, offset); + is.skipNBytes(offset); InputStream r = new LimitedInputStream(is, length); shouldClose = false; return r; @@ -152,10 +149,7 @@ public Object convertToNettyForSsl() throws IOException { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("file", file) - .append("offset", offset) - .append("length", length) - .toString(); + return "FileSegmentManagedBuffer[file=" + file + ",offset=" + offset + + ",length=" + length + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java index a40cfc8bc04b1..e7b8bafa92f4b 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java +++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/NettyManagedBuffer.java @@ -23,8 +23,6 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufInputStream; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * A {@link ManagedBuffer} backed by a Netty {@link ByteBuf}. @@ -75,8 +73,6 @@ public Object convertToNettyForSsl() throws IOException { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("buf", buf) - .toString(); + return "NettyManagedBuffer[buf=" + buf + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java index 6eb8d4e2c731c..d97f853c58f1f 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java +++ b/common/network-common/src/main/java/org/apache/spark/network/buffer/NioManagedBuffer.java @@ -23,8 +23,6 @@ import io.netty.buffer.ByteBufInputStream; import io.netty.buffer.Unpooled; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * A {@link ManagedBuffer} backed by {@link ByteBuffer}. @@ -73,9 +71,7 @@ public Object convertToNettyForSsl() throws IOException { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("buf", buf) - .toString(); + return "NioManagedBuffer[buf=" + buf + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java index a9df47645d36f..f02f2c63ecd4c 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClient.java @@ -21,20 +21,17 @@ import java.io.IOException; import java.net.SocketAddress; import java.nio.ByteBuffer; +import java.util.Objects; import java.util.UUID; import java.util.concurrent.ExecutionException; import java.util.concurrent.TimeUnit; import javax.annotation.Nullable; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; import com.google.common.util.concurrent.SettableFuture; import io.netty.channel.Channel; import io.netty.util.concurrent.Future; import io.netty.util.concurrent.GenericFutureListener; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -43,6 +40,7 @@ import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NioManagedBuffer; import org.apache.spark.network.protocol.*; +import org.apache.spark.network.util.JavaUtils; import static org.apache.spark.network.util.NettyUtils.getRemoteAddress; @@ -81,8 +79,8 @@ public class TransportClient implements Closeable { private volatile boolean timedOut; public TransportClient(Channel channel, TransportResponseHandler handler) { - this.channel = Preconditions.checkNotNull(channel); - this.handler = Preconditions.checkNotNull(handler); + this.channel = Objects.requireNonNull(channel); + this.handler = Objects.requireNonNull(handler); this.timedOut = false; } @@ -113,7 +111,7 @@ public String getClientId() { * Trying to set a different client ID after it's been set will result in an exception. */ public void setClientId(String id) { - Preconditions.checkState(clientId == null, "Client ID has already been set."); + JavaUtils.checkState(clientId == null, "Client ID has already been set."); this.clientId = id; } @@ -290,10 +288,9 @@ public void onFailure(Throwable e) { try { return result.get(timeoutMs, TimeUnit.MILLISECONDS); } catch (ExecutionException e) { - Throwables.throwIfUnchecked(e.getCause()); throw new RuntimeException(e.getCause()); } catch (Exception e) { - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } } @@ -338,11 +335,8 @@ public void close() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("remoteAddress", channel.remoteAddress()) - .append("clientId", clientId) - .append("isActive", isActive()) - .toString(); + return "TransportClient[remoteAddress=" + channel.remoteAddress() + "clientId=" + clientId + + ",isActive=" + isActive() + "]"; } private static long requestId() { @@ -369,8 +363,8 @@ public void operationComplete(Future future) throws Exception { } } else { logger.error("Failed to send RPC {} to {}", future.cause(), - MDC.of(LogKeys.REQUEST_ID$.MODULE$, requestId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); + MDC.of(LogKeys.REQUEST_ID, requestId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); channel.close(); try { String errorMsg = String.format("Failed to send RPC %s to %s: %s", requestId, diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java index d64b8c8f838e9..2137b5f3136ef 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportClientFactory.java @@ -21,16 +21,15 @@ import java.io.IOException; import java.net.InetSocketAddress; import java.net.SocketAddress; +import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.Random; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicReference; import com.codahale.metrics.MetricSet; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; -import com.google.common.collect.Lists; import io.netty.bootstrap.Bootstrap; import io.netty.buffer.PooledByteBufAllocator; import io.netty.channel.Channel; @@ -100,9 +99,9 @@ private static class ClientPool { public TransportClientFactory( TransportContext context, List clientBootstraps) { - this.context = Preconditions.checkNotNull(context); + this.context = Objects.requireNonNull(context); this.conf = context.getConf(); - this.clientBootstraps = Lists.newArrayList(Preconditions.checkNotNull(clientBootstraps)); + this.clientBootstraps = new ArrayList<>(Objects.requireNonNull(clientBootstraps)); this.connectionPool = new ConcurrentHashMap<>(); this.numConnectionsPerPeer = conf.numConnectionsPerPeer(); this.rand = new Random(); @@ -193,9 +192,9 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f final String resolvMsg = resolvedAddress.isUnresolved() ? "failed" : "succeed"; if (hostResolveTimeMs > 2000) { logger.warn("DNS resolution {} for {} took {} ms", - MDC.of(LogKeys.STATUS$.MODULE$, resolvMsg), - MDC.of(LogKeys.HOST_PORT$.MODULE$, resolvedAddress), - MDC.of(LogKeys.TIME$.MODULE$, hostResolveTimeMs)); + MDC.of(LogKeys.STATUS, resolvMsg), + MDC.of(LogKeys.HOST_PORT, resolvedAddress), + MDC.of(LogKeys.TIME, hostResolveTimeMs)); } else { logger.trace("DNS resolution {} for {} took {} ms", resolvMsg, resolvedAddress, hostResolveTimeMs); @@ -210,7 +209,7 @@ public TransportClient createClient(String remoteHost, int remotePort, boolean f return cachedClient; } else { logger.info("Found inactive connection to {}, creating a new one.", - MDC.of(LogKeys.HOST_PORT$.MODULE$, resolvedAddress)); + MDC.of(LogKeys.HOST_PORT, resolvedAddress)); } } // If this connection should fast fail when last connection failed in last fast fail time @@ -314,7 +313,7 @@ public void operationComplete(final Future handshakeFuture) { logger.debug("{} successfully completed TLS handshake to ", address); } else { logger.info("failed to complete TLS handshake to {}", handshakeFuture.cause(), - MDC.of(LogKeys.HOST_PORT$.MODULE$, address)); + MDC.of(LogKeys.HOST_PORT, address)); cf.channel().close(); } } @@ -340,17 +339,17 @@ public void operationComplete(final Future handshakeFuture) { } catch (Exception e) { // catch non-RuntimeExceptions too as bootstrap may be written in Scala long bootstrapTimeMs = (System.nanoTime() - preBootstrap) / 1000000; logger.error("Exception while bootstrapping client after {} ms", e, - MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, bootstrapTimeMs)); + MDC.of(LogKeys.BOOTSTRAP_TIME, bootstrapTimeMs)); client.close(); - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } long postBootstrap = System.nanoTime(); logger.info("Successfully created connection to {} after {} ms ({} ms spent in bootstraps)", - MDC.of(LogKeys.HOST_PORT$.MODULE$, address), - MDC.of(LogKeys.ELAPSED_TIME$.MODULE$, (postBootstrap - preConnect) / 1000000), - MDC.of(LogKeys.BOOTSTRAP_TIME$.MODULE$, (postBootstrap - preBootstrap) / 1000000)); + MDC.of(LogKeys.HOST_PORT, address), + MDC.of(LogKeys.ELAPSED_TIME, (postBootstrap - preConnect) / 1000000), + MDC.of(LogKeys.BOOTSTRAP_TIME, (postBootstrap - preBootstrap) / 1000000)); return client; } diff --git a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java index be4cf4a58abeb..d27fa08d829bb 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java +++ b/common/network-common/src/main/java/org/apache/spark/network/client/TransportResponseHandler.java @@ -26,8 +26,6 @@ import com.google.common.annotations.VisibleForTesting; import io.netty.channel.Channel; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -45,6 +43,7 @@ import org.apache.spark.network.server.MessageHandler; import static org.apache.spark.network.util.NettyUtils.getRemoteAddress; import org.apache.spark.network.util.TransportFrameDecoder; +import org.apache.spark.util.Pair; /** * Handler that processes server responses, in response to requests issued from a @@ -96,7 +95,7 @@ public void removeRpcRequest(long requestId) { public void addStreamCallback(String streamId, StreamCallback callback) { updateTimeOfLastRequest(); - streamCallbacks.offer(ImmutablePair.of(streamId, callback)); + streamCallbacks.offer(Pair.of(streamId, callback)); } @VisibleForTesting @@ -125,7 +124,7 @@ private void failOutstandingRequests(Throwable cause) { } for (Pair entry : streamCallbacks) { try { - entry.getValue().onFailure(entry.getKey(), cause); + entry.getRight().onFailure(entry.getLeft(), cause); } catch (Exception e) { logger.warn("StreamCallback.onFailure throws exception", e); } @@ -146,8 +145,8 @@ public void channelInactive() { if (hasOutstandingRequests()) { String remoteAddress = getRemoteAddress(channel); logger.error("Still have {} requests outstanding when connection from {} is closed", - MDC.of(LogKeys.COUNT$.MODULE$, numOutstandingRequests()), - MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress)); + MDC.of(LogKeys.COUNT, numOutstandingRequests()), + MDC.of(LogKeys.HOST_PORT, remoteAddress)); failOutstandingRequests(new IOException("Connection from " + remoteAddress + " closed")); } } @@ -157,8 +156,8 @@ public void exceptionCaught(Throwable cause) { if (hasOutstandingRequests()) { String remoteAddress = getRemoteAddress(channel); logger.error("Still have {} requests outstanding when connection from {} is closed", - MDC.of(LogKeys.COUNT$.MODULE$, numOutstandingRequests()), - MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress)); + MDC.of(LogKeys.COUNT, numOutstandingRequests()), + MDC.of(LogKeys.HOST_PORT, remoteAddress)); failOutstandingRequests(cause); } } @@ -169,8 +168,8 @@ public void handle(ResponseMessage message) throws Exception { ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId); if (listener == null) { logger.warn("Ignoring response for block {} from {} since it is not outstanding", - MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, resp.streamChunkId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); + MDC.of(LogKeys.STREAM_CHUNK_ID, resp.streamChunkId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); resp.body().release(); } else { outstandingFetches.remove(resp.streamChunkId); @@ -181,9 +180,9 @@ public void handle(ResponseMessage message) throws Exception { ChunkReceivedCallback listener = outstandingFetches.get(resp.streamChunkId); if (listener == null) { logger.warn("Ignoring response for block {} from {} ({}) since it is not outstanding", - MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, resp.streamChunkId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)), - MDC.of(LogKeys.ERROR$.MODULE$, resp.errorString)); + MDC.of(LogKeys.STREAM_CHUNK_ID, resp.streamChunkId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)), + MDC.of(LogKeys.ERROR, resp.errorString)); } else { outstandingFetches.remove(resp.streamChunkId); listener.onFailure(resp.streamChunkId.chunkIndex(), new ChunkFetchFailureException( @@ -193,9 +192,9 @@ public void handle(ResponseMessage message) throws Exception { RpcResponseCallback listener = (RpcResponseCallback) outstandingRpcs.get(resp.requestId); if (listener == null) { logger.warn("Ignoring response for RPC {} from {} ({} bytes) since it is not outstanding", - MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)), - MDC.of(LogKeys.RESPONSE_BODY_SIZE$.MODULE$, resp.body().size())); + MDC.of(LogKeys.REQUEST_ID, resp.requestId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)), + MDC.of(LogKeys.RESPONSE_BODY_SIZE, resp.body().size())); resp.body().release(); } else { outstandingRpcs.remove(resp.requestId); @@ -209,9 +208,9 @@ public void handle(ResponseMessage message) throws Exception { BaseResponseCallback listener = outstandingRpcs.get(resp.requestId); if (listener == null) { logger.warn("Ignoring response for RPC {} from {} ({}) since it is not outstanding", - MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)), - MDC.of(LogKeys.ERROR$.MODULE$, resp.errorString)); + MDC.of(LogKeys.REQUEST_ID, resp.requestId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)), + MDC.of(LogKeys.ERROR, resp.errorString)); } else { outstandingRpcs.remove(resp.requestId); listener.onFailure(new RuntimeException(resp.errorString)); @@ -223,9 +222,9 @@ public void handle(ResponseMessage message) throws Exception { if (listener == null) { logger.warn("Ignoring response for MergedBlockMetaRequest {} from {} ({} bytes) since " + "it is not outstanding", - MDC.of(LogKeys.REQUEST_ID$.MODULE$, resp.requestId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel)), - MDC.of(LogKeys.RESPONSE_BODY_SIZE$.MODULE$, resp.body().size())); + MDC.of(LogKeys.REQUEST_ID, resp.requestId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel)), + MDC.of(LogKeys.RESPONSE_BODY_SIZE, resp.body().size())); } else { outstandingRpcs.remove(resp.requestId); listener.onSuccess(resp.getNumChunks(), resp.body()); @@ -236,7 +235,7 @@ public void handle(ResponseMessage message) throws Exception { } else if (message instanceof StreamResponse resp) { Pair entry = streamCallbacks.poll(); if (entry != null) { - StreamCallback callback = entry.getValue(); + StreamCallback callback = entry.getRight(); if (resp.byteCount > 0) { StreamInterceptor interceptor = new StreamInterceptor<>( this, resp.streamId, resp.byteCount, callback); @@ -262,7 +261,7 @@ public void handle(ResponseMessage message) throws Exception { } else if (message instanceof StreamFailure resp) { Pair entry = streamCallbacks.poll(); if (entry != null) { - StreamCallback callback = entry.getValue(); + StreamCallback callback = entry.getRight(); try { callback.onFailure(resp.streamId, new RuntimeException(resp.error)); } catch (IOException ioe) { @@ -270,7 +269,7 @@ public void handle(ResponseMessage message) throws Exception { } } else { logger.warn("Stream failure with unknown callback: {}", - MDC.of(LogKeys.ERROR$.MODULE$, resp.error)); + MDC.of(LogKeys.ERROR, resp.error)); } } else { throw new IllegalStateException("Unknown response type: " + message.type()); diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java index 8449a774a404a..f02fbc3aa26c7 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java +++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthEngine.java @@ -21,10 +21,10 @@ import java.io.Closeable; import java.security.GeneralSecurityException; import java.util.Arrays; +import java.util.Objects; import java.util.Properties; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.primitives.Bytes; import com.google.crypto.tink.subtle.AesGcmJce; import com.google.crypto.tink.subtle.Hkdf; @@ -33,6 +33,8 @@ import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import static java.nio.charset.StandardCharsets.UTF_8; + +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.TransportConf; /** @@ -61,10 +63,8 @@ class AuthEngine implements Closeable { private TransportCipher sessionCipher; AuthEngine(String appId, String preSharedSecret, TransportConf conf) { - Preconditions.checkNotNull(appId); - Preconditions.checkNotNull(preSharedSecret); - this.appId = appId; - this.preSharedSecret = preSharedSecret.getBytes(UTF_8); + this.appId = Objects.requireNonNull(appId); + this.preSharedSecret = Objects.requireNonNull(preSharedSecret).getBytes(UTF_8); this.conf = conf; this.cryptoConf = conf.cryptoConf(); // This is for backward compatibility with version 1.0 of this protocol, @@ -126,7 +126,7 @@ private AuthMessage encryptEphemeralPublicKey( private byte[] decryptEphemeralPublicKey( AuthMessage encryptedPublicKey, byte[] transcript) throws GeneralSecurityException { - Preconditions.checkArgument(appId.equals(encryptedPublicKey.appId())); + JavaUtils.checkArgument(appId.equals(encryptedPublicKey.appId()), "appID is different."); // Mix in the app ID, salt, and transcript into HKDF and use it as AES-GCM AAD byte[] aadState = Bytes.concat(appId.getBytes(UTF_8), encryptedPublicKey.salt(), transcript); // Use HKDF to derive an AES_GCM key from the pre-shared key, non-secret salt, and AAD state @@ -162,7 +162,7 @@ AuthMessage challenge() throws GeneralSecurityException { * @return An encrypted server ephemeral public key to be sent to the client. */ AuthMessage response(AuthMessage encryptedClientPublicKey) throws GeneralSecurityException { - Preconditions.checkArgument(appId.equals(encryptedClientPublicKey.appId())); + JavaUtils.checkArgument(appId.equals(encryptedClientPublicKey.appId()), "appId is different."); // Compute a shared secret given the client public key and the server private key byte[] clientPublicKey = decryptEphemeralPublicKey(encryptedClientPublicKey, EMPTY_TRANSCRIPT); @@ -190,8 +190,7 @@ AuthMessage response(AuthMessage encryptedClientPublicKey) throws GeneralSecurit */ void deriveSessionCipher(AuthMessage encryptedClientPublicKey, AuthMessage encryptedServerPublicKey) throws GeneralSecurityException { - Preconditions.checkArgument(appId.equals(encryptedClientPublicKey.appId())); - Preconditions.checkArgument(appId.equals(encryptedServerPublicKey.appId())); + JavaUtils.checkArgument(appId.equals(encryptedClientPublicKey.appId()), "appId is different."); // Compute a shared secret given the server public key and the client private key, // mixing in the protocol transcript. byte[] serverPublicKey = decryptEphemeralPublicKey( @@ -252,7 +251,7 @@ private byte[] getTranscript(AuthMessage... encryptedPublicKeys) { } TransportCipher sessionCipher() { - Preconditions.checkState(sessionCipher != null); + JavaUtils.checkState(sessionCipher != null, "sessionCipher is null."); return sessionCipher; } diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java index 087e3d21e22bb..8ce4680f32437 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java +++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/AuthRpcHandler.java @@ -20,8 +20,6 @@ import java.nio.ByteBuffer; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.base.Throwables; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.Channel; @@ -36,6 +34,7 @@ import org.apache.spark.network.sasl.SaslRpcHandler; import org.apache.spark.network.server.AbstractAuthRpcHandler; import org.apache.spark.network.server.RpcHandler; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.TransportConf; /** @@ -93,7 +92,7 @@ protected boolean doAuthChallenge( } catch (RuntimeException e) { if (conf.saslFallback()) { LOG.warn("Failed to parse new auth challenge, reverting to SASL for client {}.", - MDC.of(LogKeys.HOST_PORT$.MODULE$, channel.remoteAddress())); + MDC.of(LogKeys.HOST_PORT, channel.remoteAddress())); saslHandler = new SaslRpcHandler(conf, channel, null, secretKeyHolder); message.position(position); message.limit(limit); @@ -111,7 +110,7 @@ protected boolean doAuthChallenge( AuthEngine engine = null; try { String secret = secretKeyHolder.getSecretKey(challenge.appId()); - Preconditions.checkState(secret != null, + JavaUtils.checkState(secret != null, "Trying to authenticate non-registered app %s.", challenge.appId()); LOG.debug("Authenticating challenge for app {}.", challenge.appId()); engine = new AuthEngine(challenge.appId(), secret, conf); @@ -132,7 +131,7 @@ protected boolean doAuthChallenge( try { engine.close(); } catch (Exception e) { - Throwables.throwIfUnchecked(e); + if (e instanceof RuntimeException re) throw re; throw new RuntimeException(e); } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java index 85b893751b39c..de7d1ae5753d9 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java +++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/CtrTransportCipher.java @@ -27,7 +27,6 @@ import javax.crypto.spec.IvParameterSpec; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.*; @@ -37,6 +36,7 @@ import org.apache.spark.network.util.AbstractFileRegion; import org.apache.spark.network.util.ByteArrayReadableChannel; import org.apache.spark.network.util.ByteArrayWritableChannel; +import org.apache.spark.network.util.JavaUtils; /** * Cipher for encryption and decryption. @@ -239,7 +239,7 @@ static class EncryptedMessage extends AbstractFileRegion { Object msg, ByteArrayWritableChannel byteEncChannel, ByteArrayWritableChannel byteRawChannel) { - Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion, + JavaUtils.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion, "Unrecognized message type: %s", msg.getClass().getName()); this.handler = handler; this.isByteBuf = msg instanceof ByteBuf; @@ -304,7 +304,7 @@ public boolean release(int decrement) { @Override public long transferTo(WritableByteChannel target, long position) throws IOException { - Preconditions.checkArgument(position == transferred(), "Invalid position."); + JavaUtils.checkArgument(position == transferred(), "Invalid position."); if (transferred == count) { return 0; diff --git a/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java index c3540838bef09..e1cf22a612ea4 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java +++ b/common/network-common/src/main/java/org/apache/spark/network/crypto/GcmTransportCipher.java @@ -18,15 +18,16 @@ package org.apache.spark.network.crypto; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.primitives.Longs; import com.google.crypto.tink.subtle.*; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.*; import io.netty.util.ReferenceCounted; + import org.apache.spark.network.util.AbstractFileRegion; import org.apache.spark.network.util.ByteBufferWriteableChannel; +import org.apache.spark.network.util.JavaUtils; import javax.crypto.spec.SecretKeySpec; import java.io.IOException; @@ -118,7 +119,7 @@ static class GcmEncryptedMessage extends AbstractFileRegion { Object plaintextMessage, ByteBuffer plaintextBuffer, ByteBuffer ciphertextBuffer) throws GeneralSecurityException { - Preconditions.checkArgument( + JavaUtils.checkArgument( plaintextMessage instanceof ByteBuf || plaintextMessage instanceof FileRegion, "Unrecognized message type: %s", plaintextMessage.getClass().getName()); this.plaintextMessage = plaintextMessage; @@ -221,10 +222,12 @@ public long transferTo(WritableByteChannel target, long position) throws IOExcep int readLimit = (int) Math.min(readableBytes, plaintextBuffer.remaining()); if (plaintextMessage instanceof ByteBuf byteBuf) { - Preconditions.checkState(0 == plaintextBuffer.position()); + JavaUtils.checkState(0 == plaintextBuffer.position(), + "plaintextBuffer.position is not 0"); plaintextBuffer.limit(readLimit); byteBuf.readBytes(plaintextBuffer); - Preconditions.checkState(readLimit == plaintextBuffer.position()); + JavaUtils.checkState(readLimit == plaintextBuffer.position(), + "plaintextBuffer.position should be equal to readLimit."); } else if (plaintextMessage instanceof FileRegion fileRegion) { ByteBufferWriteableChannel plaintextChannel = new ByteBufferWriteableChannel(plaintextBuffer); @@ -347,7 +350,7 @@ private boolean initalizeDecrypter(ByteBuf ciphertextNettyBuf) @Override public void channelRead(ChannelHandlerContext ctx, Object ciphertextMessage) throws GeneralSecurityException { - Preconditions.checkArgument(ciphertextMessage instanceof ByteBuf, + JavaUtils.checkArgument(ciphertextMessage instanceof ByteBuf, "Unrecognized message type: %s", ciphertextMessage.getClass().getName()); ByteBuf ciphertextNettyBuf = (ByteBuf) ciphertextMessage; diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java index 2924218c2f08b..1170fd3f1ab33 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/AbstractMessage.java @@ -17,7 +17,7 @@ package org.apache.spark.network.protocol; -import com.google.common.base.Objects; +import java.util.Objects; import org.apache.spark.network.buffer.ManagedBuffer; @@ -48,7 +48,7 @@ public boolean isBodyInFrame() { } protected boolean equals(AbstractMessage other) { - return isBodyInFrame == other.isBodyInFrame && Objects.equal(body, other.body); + return isBodyInFrame == other.isBodyInFrame && Objects.equals(body, other.body); } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java index cbad4c61b9b4a..736d8e6f5eea2 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchFailure.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * Response to {@link ChunkFetchRequest} when there is an error fetching the chunk. @@ -70,9 +68,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamChunkId", streamChunkId) - .append("errorString", errorString) - .toString(); + return "ChunkFetchFailure[streamChunkId=" + streamChunkId + ",errorString=" + errorString + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java index 2865388b3297c..cc042fdf76b77 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchRequest.java @@ -18,8 +18,6 @@ package org.apache.spark.network.protocol; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * Request to fetch a sequence of a single chunk of a stream. This will correspond to a single @@ -64,8 +62,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamChunkId", streamChunkId) - .toString(); + return "ChunkFetchRequest[streamChunkId=" + streamChunkId + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java index aa89b2062f626..948190e7a2d53 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/ChunkFetchSuccess.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; @@ -83,9 +81,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamChunkId", streamChunkId) - .append("buffer", body()) - .toString(); + return "ChunkFetchSuccess[streamChunkId=" + streamChunkId + ",body=" + body() + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java index 321ac13881c2a..84917eca17190 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/EncryptedMessageWithHeader.java @@ -21,7 +21,6 @@ import java.io.InputStream; import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.channel.ChannelHandlerContext; @@ -29,6 +28,7 @@ import io.netty.handler.stream.ChunkedInput; import org.apache.spark.network.buffer.ManagedBuffer; +import org.apache.spark.network.util.JavaUtils; /** * A wrapper message that holds two separate pieces (a header and a body). @@ -60,7 +60,7 @@ public class EncryptedMessageWithHeader implements ChunkedInput { public EncryptedMessageWithHeader( @Nullable ManagedBuffer managedBuffer, ByteBuf header, Object body, long bodyLength) { - Preconditions.checkArgument(body instanceof InputStream || body instanceof ChunkedStream, + JavaUtils.checkArgument(body instanceof InputStream || body instanceof ChunkedStream, "Body must be an InputStream or a ChunkedStream."); this.managedBuffer = managedBuffer; this.header = header; diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java index 3723730ebc06c..0b1476664f651 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaRequest.java @@ -17,10 +17,9 @@ package org.apache.spark.network.protocol; -import com.google.common.base.Objects; +import java.util.Objects; + import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * Request to find the meta information for the specified merged block. The meta information @@ -79,7 +78,7 @@ public static MergedBlockMetaRequest decode(ByteBuf buf) { @Override public int hashCode() { - return Objects.hashCode(requestId, appId, shuffleId, shuffleMergeId, reduceId); + return Objects.hash(requestId, appId, shuffleId, shuffleMergeId, reduceId); } @Override @@ -87,19 +86,14 @@ public boolean equals(Object other) { if (other instanceof MergedBlockMetaRequest o) { return requestId == o.requestId && shuffleId == o.shuffleId && shuffleMergeId == o.shuffleMergeId && reduceId == o.reduceId && - Objects.equal(appId, o.appId); + Objects.equals(appId, o.appId); } return false; } @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("requestId", requestId) - .append("appId", appId) - .append("shuffleId", shuffleId) - .append("shuffleMergeId", shuffleMergeId) - .append("reduceId", reduceId) - .toString(); + return "MergedBlockMetaRequest[requestId=" + requestId + ",appId=" + appId + ",shuffleId=" + + shuffleId + ",shuffleMergeId=" + shuffleMergeId + ",reduceId=" + reduceId + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java index d2edaf4532e11..255174e34600c 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MergedBlockMetaSuccess.java @@ -17,10 +17,9 @@ package org.apache.spark.network.protocol; -import com.google.common.base.Objects; +import java.util.Objects; + import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; @@ -51,13 +50,12 @@ public Type type() { @Override public int hashCode() { - return Objects.hashCode(requestId, numChunks); + return Objects.hash(requestId, numChunks); } @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("requestId", requestId).append("numChunks", numChunks).toString(); + return "MergedBlockMetaSuccess[requestId=" + requestId + ",numChunks=" + numChunks + "]"; } @Override diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java index ab20fb908eb42..3f23c17939e6c 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageEncoder.java @@ -66,8 +66,8 @@ public void encode(ChannelHandlerContext ctx, Message in, List out) thro // Re-encode this message as a failure response. String error = e.getMessage() != null ? e.getMessage() : "null"; logger.error("Error processing {} for client {}", e, - MDC.of(LogKeys.MESSAGE$.MODULE$, in), - MDC.of(LogKeys.HOST_PORT$.MODULE$, ctx.channel().remoteAddress())); + MDC.of(LogKeys.MESSAGE, in), + MDC.of(LogKeys.HOST_PORT, ctx.channel().remoteAddress())); encode(ctx, resp.createFailureResponse(error), out); } else { throw e; diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java index e8eb83e7577bf..993ce2381caa5 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/MessageWithHeader.java @@ -22,13 +22,13 @@ import java.nio.channels.WritableByteChannel; import javax.annotation.Nullable; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.channel.FileRegion; import io.netty.util.ReferenceCountUtil; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.util.AbstractFileRegion; +import org.apache.spark.network.util.JavaUtils; /** * A wrapper message that holds two separate pieces (a header and a body). @@ -72,7 +72,7 @@ public class MessageWithHeader extends AbstractFileRegion { ByteBuf header, Object body, long bodyLength) { - Preconditions.checkArgument(body instanceof ByteBuf || body instanceof FileRegion, + JavaUtils.checkArgument(body instanceof ByteBuf || body instanceof FileRegion, "Body must be a ByteBuf or a FileRegion."); this.managedBuffer = managedBuffer; this.header = header; @@ -105,7 +105,7 @@ public long transferred() { */ @Override public long transferTo(final WritableByteChannel target, final long position) throws IOException { - Preconditions.checkArgument(position == totalBytesTransferred, "Invalid position."); + JavaUtils.checkArgument(position == totalBytesTransferred, "Invalid position."); // Bytes written for header in this call. long writtenHeader = 0; if (header.readableBytes() > 0) { diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java index 91c818f3612a9..de1f91bc8d318 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/OneWayMessage.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; @@ -74,8 +72,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("body", body()) - .toString(); + return "OneWayMessage[body=" + body() + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java index 02a45d68c650e..f48264a494f0e 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcFailure.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** Response to {@link RpcRequest} for a failed RPC. */ public final class RpcFailure extends AbstractMessage implements ResponseMessage { @@ -68,9 +66,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("requestId", requestId) - .append("errorString", errorString) - .toString(); + return "RpcFailure[requestId=" + requestId + ",errorString=" + errorString + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java index a7dbe1283b314..2619b176e331e 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcRequest.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; @@ -80,9 +78,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("requestId", requestId) - .append("body", body()) - .toString(); + return "RpcRequest[requestId=" + requestId + ",body=" + body() + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java index 85709e36f83ee..a9805bcf686b4 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/RpcResponse.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; @@ -80,9 +78,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("requestId", requestId) - .append("body", body()) - .toString(); + return "RpcResponse[requestId=" + requestId + ",body=" + body() + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java index abe6ccca7bfd6..083e45b3d6bdb 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/SslMessageEncoder.java @@ -71,8 +71,8 @@ public void encode(ChannelHandlerContext ctx, Message in, List out) thro // Re-encode this message as a failure response. String error = e.getMessage() != null ? e.getMessage() : "null"; logger.error("Error processing {} for client {}", e, - MDC.of(LogKeys.MESSAGE$.MODULE$, in), - MDC.of(LogKeys.HOST_PORT$.MODULE$, ctx.channel().remoteAddress())); + MDC.of(LogKeys.MESSAGE, in), + MDC.of(LogKeys.HOST_PORT, ctx.channel().remoteAddress())); encode(ctx, resp.createFailureResponse(error), out); } else { throw e; diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java index c3b715009dffe..61aae3e36eceb 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamChunkId.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * Encapsulates a request for a particular chunk of a stream. @@ -61,9 +59,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamId", streamId) - .append("chunkIndex", chunkIndex) - .toString(); + return "StreamChunkId[streamId=" + streamId + ",chunkIndex=" + chunkIndex + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java index 9a7bf2f65af3a..50cc25a4919a5 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamFailure.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * Message indicating an error when transferring a stream. @@ -70,10 +68,7 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamId", streamId) - .append("error", error) - .toString(); + return "StreamFailure[streamId=" + streamId + ",error=" + error + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java index 5906b4d380d6e..45ca2578b01a4 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamRequest.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; /** * Request to stream data from the remote end. @@ -69,9 +67,7 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamId", streamId) - .toString(); + return "StreamRequest[streamId=" + streamId + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java index 0c0aa5c9a635b..d7c304e5c5b34 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/StreamResponse.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; @@ -83,11 +81,8 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamId", streamId) - .append("byteCount", byteCount) - .append("body", body()) - .toString(); + return "StreamResponse[streamId=" + streamId + ",byteCount=" + byteCount + + ",body=" + body() + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java index 4722f39dfa9db..09baaf60c3f92 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java +++ b/common/network-common/src/main/java/org/apache/spark/network/protocol/UploadStream.java @@ -21,8 +21,6 @@ import java.nio.ByteBuffer; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; @@ -99,9 +97,6 @@ public boolean equals(Object other) { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("requestId", requestId) - .append("body", body()) - .toString(); + return "UploadStream[requestId=" + requestId + ",body=" + body() + "]"; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java index e1275689ae6a0..1cdb951d2d04e 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java +++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SaslEncryption.java @@ -23,7 +23,6 @@ import java.util.List; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.Channel; @@ -35,6 +34,7 @@ import org.apache.spark.network.util.AbstractFileRegion; import org.apache.spark.network.util.ByteArrayWritableChannel; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.NettyUtils; /** @@ -152,7 +152,7 @@ static class EncryptedMessage extends AbstractFileRegion { private long transferred; EncryptedMessage(SaslEncryptionBackend backend, Object msg, int maxOutboundBlockSize) { - Preconditions.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion, + JavaUtils.checkArgument(msg instanceof ByteBuf || msg instanceof FileRegion, "Unrecognized message type: %s", msg.getClass().getName()); this.backend = backend; this.isByteBuf = msg instanceof ByteBuf; @@ -241,7 +241,7 @@ public boolean release(int decrement) { public long transferTo(final WritableByteChannel target, final long position) throws IOException { - Preconditions.checkArgument(position == transferred(), "Invalid position."); + JavaUtils.checkArgument(position == transferred(), "Invalid position."); long reportedWritten = 0L; long actuallyWritten = 0L; diff --git a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java index f32fd5145c7c5..24e01c924ef3a 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java +++ b/common/network-common/src/main/java/org/apache/spark/network/sasl/SparkSaslServer.java @@ -29,8 +29,8 @@ import javax.security.sasl.SaslServer; import java.nio.charset.StandardCharsets; import java.util.Map; +import java.util.Objects; -import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableMap; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; @@ -182,13 +182,13 @@ public void handle(Callback[] callbacks) throws UnsupportedCallbackException { /* Encode a byte[] identifier as a Base64-encoded string. */ public static String encodeIdentifier(String identifier) { - Preconditions.checkNotNull(identifier, "User cannot be null if SASL is enabled"); + Objects.requireNonNull(identifier, "User cannot be null if SASL is enabled"); return getBase64EncodedString(identifier); } /** Encode a password as a base64-encoded char[] array. */ public static char[] encodePassword(String password) { - Preconditions.checkNotNull(password, "Password cannot be null if SASL is enabled"); + Objects.requireNonNull(password, "Password cannot be null if SASL is enabled"); return getBase64EncodedString(password).toCharArray(); } diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java b/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java index f60a74670d149..a0e9305265385 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/BlockPushNonFatalFailure.java @@ -18,8 +18,9 @@ package org.apache.spark.network.server; import java.nio.ByteBuffer; +import java.util.Objects; -import com.google.common.base.Preconditions; +import org.apache.spark.network.util.JavaUtils; /** * A special RuntimeException thrown when shuffle service experiences a non-fatal failure @@ -101,14 +102,12 @@ public synchronized Throwable fillInStackTrace() { public ByteBuffer getResponse() { // Ensure we do not invoke this method if response is not set - Preconditions.checkNotNull(response); - return response; + return Objects.requireNonNull(response); } public ReturnCode getReturnCode() { // Ensure we do not invoke this method if returnCode is not set - Preconditions.checkNotNull(returnCode); - return returnCode; + return Objects.requireNonNull(returnCode); } public enum ReturnCode { @@ -171,7 +170,7 @@ public static boolean shouldNotRetryErrorCode(ReturnCode returnCode) { } public static String getErrorMsg(String blockId, ReturnCode errorCode) { - Preconditions.checkArgument(errorCode != ReturnCode.SUCCESS); + JavaUtils.checkArgument(errorCode != ReturnCode.SUCCESS, "errorCode should not be SUCCESS."); return "Block " + blockId + errorCode.errorMsgSuffix; } } diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java index cc0bed7ed5b6d..c7d4d671dec7d 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/ChunkFetchRequestHandler.java @@ -19,7 +19,6 @@ import java.net.SocketAddress; -import com.google.common.base.Throwables; import io.netty.channel.Channel; import io.netty.channel.ChannelFuture; import io.netty.channel.ChannelFutureListener; @@ -36,6 +35,7 @@ import org.apache.spark.network.protocol.ChunkFetchRequest; import org.apache.spark.network.protocol.ChunkFetchSuccess; import org.apache.spark.network.protocol.Encodable; +import org.apache.spark.network.util.JavaUtils; import static org.apache.spark.network.util.NettyUtils.*; @@ -74,7 +74,7 @@ public ChunkFetchRequestHandler( @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { logger.warn("Exception in connection from {}", cause, - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(ctx.channel()))); + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(ctx.channel()))); ctx.close(); } @@ -96,8 +96,8 @@ public void processFetchRequest( long chunksBeingTransferred = streamManager.chunksBeingTransferred(); if (chunksBeingTransferred >= maxChunksBeingTransferred) { logger.warn("The number of chunks being transferred {} is above {}, close the connection.", - MDC.of(LogKeys.NUM_CHUNKS$.MODULE$, chunksBeingTransferred), - MDC.of(LogKeys.MAX_NUM_CHUNKS$.MODULE$, maxChunksBeingTransferred)); + MDC.of(LogKeys.NUM_CHUNKS, chunksBeingTransferred), + MDC.of(LogKeys.MAX_NUM_CHUNKS, maxChunksBeingTransferred)); channel.close(); return; } @@ -111,10 +111,10 @@ public void processFetchRequest( } } catch (Exception e) { logger.error("Error opening block {} for request from {}", e, - MDC.of(LogKeys.STREAM_CHUNK_ID$.MODULE$, msg.streamChunkId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); + MDC.of(LogKeys.STREAM_CHUNK_ID, msg.streamChunkId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); respond(channel, new ChunkFetchFailure(msg.streamChunkId, - Throwables.getStackTraceAsString(e))); + JavaUtils.stackTraceToString(e))); return; } @@ -153,8 +153,8 @@ private ChannelFuture respond( } else { logger.error("Error sending result {} to {}; closing connection", future.cause(), - MDC.of(LogKeys.RESULT$.MODULE$, result), - MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress)); + MDC.of(LogKeys.RESULT, result), + MDC.of(LogKeys.HOST_PORT, remoteAddress)); channel.close(); } }); diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java index f322293782dee..cb53d565e7e87 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/OneForOneStreamManager.java @@ -19,20 +19,20 @@ import java.util.Iterator; import java.util.Map; +import java.util.Objects; import java.util.Random; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.atomic.AtomicLong; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import io.netty.channel.Channel; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.util.JavaUtils; +import org.apache.spark.util.Pair; /** * StreamManager which allows registration of an Iterator<ManagedBuffer>, which are @@ -72,7 +72,7 @@ private static class StreamState { Channel channel, boolean isBufferMaterializedOnNext) { this.appId = appId; - this.buffers = Preconditions.checkNotNull(buffers); + this.buffers = Objects.requireNonNull(buffers); this.associatedChannel = channel; this.isBufferMaterializedOnNext = isBufferMaterializedOnNext; } @@ -127,7 +127,7 @@ public static Pair parseStreamChunkId(String streamChunkId) { "Stream id and chunk index should be specified."; long streamId = Long.valueOf(array[0]); int chunkIndex = Integer.valueOf(array[1]); - return ImmutablePair.of(streamId, chunkIndex); + return Pair.of(streamId, chunkIndex); } @Override @@ -167,7 +167,7 @@ public void connectionTerminated(Channel channel) { public void checkAuthorization(TransportClient client, long streamId) { if (client.getClientId() != null) { StreamState state = streams.get(streamId); - Preconditions.checkArgument(state != null, "Unknown stream ID."); + JavaUtils.checkArgument(state != null, "Unknown stream ID."); if (!client.getClientId().equals(state.appId)) { throw new SecurityException(String.format( "Client %s not authorized to read stream %d (app %s).", diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java index 283f0f0a431fd..d0df24873cbce 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportChannelHandler.java @@ -88,7 +88,7 @@ public TransportClient getClient() { @Override public void exceptionCaught(ChannelHandlerContext ctx, Throwable cause) throws Exception { logger.warn("Exception in connection from {}", cause, - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(ctx.channel()))); + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(ctx.channel()))); requestHandler.exceptionCaught(cause); responseHandler.exceptionCaught(cause); ctx.close(); @@ -168,9 +168,9 @@ public void userEventTriggered(ChannelHandlerContext ctx, Object evt) throws Exc logger.error("Connection to {} has been quiet for {} ms while there are outstanding " + "requests. Assuming connection is dead; please adjust" + " spark.{}.io.connectionTimeout if this is wrong.", - MDC.of(LogKeys.HOST_PORT$.MODULE$, address), - MDC.of(LogKeys.TIMEOUT$.MODULE$, requestTimeoutNs / 1000 / 1000), - MDC.of(LogKeys.MODULE_NAME$.MODULE$, transportContext.getConf().getModuleName())); + MDC.of(LogKeys.HOST_PORT, address), + MDC.of(LogKeys.TIMEOUT, requestTimeoutNs / 1000 / 1000), + MDC.of(LogKeys.MODULE_NAME, transportContext.getConf().getModuleName())); client.timeOut(); ctx.close(); } else if (closeIdleConnections) { diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java index 2727051894b7a..464d4d9eb378f 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportRequestHandler.java @@ -21,7 +21,6 @@ import java.net.SocketAddress; import java.nio.ByteBuffer; -import com.google.common.base.Throwables; import io.netty.channel.Channel; import io.netty.channel.ChannelFuture; @@ -33,6 +32,7 @@ import org.apache.spark.network.buffer.NioManagedBuffer; import org.apache.spark.network.client.*; import org.apache.spark.network.protocol.*; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.TransportFrameDecoder; import static org.apache.spark.network.util.NettyUtils.getRemoteAddress; @@ -132,8 +132,8 @@ private void processStreamRequest(final StreamRequest req) { long chunksBeingTransferred = streamManager.chunksBeingTransferred(); if (chunksBeingTransferred >= maxChunksBeingTransferred) { logger.warn("The number of chunks being transferred {} is above {}, close the connection.", - MDC.of(LogKeys.NUM_CHUNKS$.MODULE$, chunksBeingTransferred), - MDC.of(LogKeys.MAX_NUM_CHUNKS$.MODULE$, maxChunksBeingTransferred)); + MDC.of(LogKeys.NUM_CHUNKS, chunksBeingTransferred), + MDC.of(LogKeys.MAX_NUM_CHUNKS, maxChunksBeingTransferred)); channel.close(); return; } @@ -143,9 +143,9 @@ private void processStreamRequest(final StreamRequest req) { buf = streamManager.openStream(req.streamId); } catch (Exception e) { logger.error("Error opening stream {} for request from {}", e, - MDC.of(LogKeys.STREAM_ID$.MODULE$, req.streamId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); - respond(new StreamFailure(req.streamId, Throwables.getStackTraceAsString(e))); + MDC.of(LogKeys.STREAM_ID, req.streamId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); + respond(new StreamFailure(req.streamId, JavaUtils.stackTraceToString(e))); return; } @@ -172,14 +172,14 @@ public void onSuccess(ByteBuffer response) { @Override public void onFailure(Throwable e) { - respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e))); + respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e))); } }); } catch (Exception e) { logger.error("Error while invoking RpcHandler#receive() on RPC id {} from {}", e, - MDC.of(LogKeys.REQUEST_ID$.MODULE$, req.requestId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); - respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e))); + MDC.of(LogKeys.REQUEST_ID, req.requestId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); + respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e))); } finally { req.body().release(); } @@ -199,7 +199,7 @@ public void onSuccess(ByteBuffer response) { @Override public void onFailure(Throwable e) { - respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e))); + respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e))); } }; TransportFrameDecoder frameDecoder = (TransportFrameDecoder) @@ -264,9 +264,9 @@ public String getID() { new NioManagedBuffer(blockPushNonFatalFailure.getResponse()))); } else { logger.error("Error while invoking RpcHandler#receive() on RPC id {} from {}", e, - MDC.of(LogKeys.REQUEST_ID$.MODULE$, req.requestId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); - respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e))); + MDC.of(LogKeys.REQUEST_ID, req.requestId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); + respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e))); } // We choose to totally fail the channel, rather than trying to recover as we do in other // cases. We don't know how many bytes of the stream the client has already sent for the @@ -282,7 +282,7 @@ private void processOneWayMessage(OneWayMessage req) { rpcHandler.receive(reverseClient, req.body().nioByteBuffer()); } catch (Exception e) { logger.error("Error while invoking RpcHandler#receive() for one-way message from {}.", e, - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); } finally { req.body().release(); } @@ -302,16 +302,16 @@ public void onSuccess(int numChunks, ManagedBuffer buffer) { @Override public void onFailure(Throwable e) { logger.trace("Failed to send meta for {}", req); - respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e))); + respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e))); } }); } catch (Exception e) { logger.error("Error while invoking receiveMergeBlockMetaReq() for appId {} shuffleId {} " - + "reduceId {} from {}", e, MDC.of(LogKeys.APP_ID$.MODULE$, req.appId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, req.shuffleId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, req.reduceId), - MDC.of(LogKeys.HOST_PORT$.MODULE$, getRemoteAddress(channel))); - respond(new RpcFailure(req.requestId, Throwables.getStackTraceAsString(e))); + + "reduceId {} from {}", e, MDC.of(LogKeys.APP_ID, req.appId), + MDC.of(LogKeys.SHUFFLE_ID, req.shuffleId), + MDC.of(LogKeys.REDUCE_ID, req.reduceId), + MDC.of(LogKeys.HOST_PORT, getRemoteAddress(channel))); + respond(new RpcFailure(req.requestId, JavaUtils.stackTraceToString(e))); } } @@ -326,8 +326,8 @@ private ChannelFuture respond(Encodable result) { logger.trace("Sent result {} to client {}", result, remoteAddress); } else { logger.error("Error sending result {} to {}; closing connection", future.cause(), - MDC.of(LogKeys.RESULT$.MODULE$, result), - MDC.of(LogKeys.HOST_PORT$.MODULE$, remoteAddress)); + MDC.of(LogKeys.RESULT, result), + MDC.of(LogKeys.HOST_PORT, remoteAddress)); channel.close(); } }); diff --git a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java index d1a19652f5649..be5d9e03c45c1 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java +++ b/common/network-common/src/main/java/org/apache/spark/network/server/TransportServer.java @@ -19,13 +19,13 @@ import java.io.Closeable; import java.net.InetSocketAddress; +import java.util.ArrayList; import java.util.List; +import java.util.Objects; import java.util.concurrent.TimeUnit; import com.codahale.metrics.Counter; import com.codahale.metrics.MetricSet; -import com.google.common.base.Preconditions; -import com.google.common.collect.Lists; import io.netty.bootstrap.ServerBootstrap; import io.netty.buffer.PooledByteBufAllocator; import io.netty.channel.ChannelFuture; @@ -33,7 +33,6 @@ import io.netty.channel.ChannelOption; import io.netty.channel.EventLoopGroup; import io.netty.channel.socket.SocketChannel; -import org.apache.commons.lang3.SystemUtils; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -77,7 +76,7 @@ public TransportServer( this.pooledAllocator = NettyUtils.createPooledByteBufAllocator( conf.preferDirectBufs(), true /* allowCache */, conf.serverThreads()); } - this.bootstraps = Lists.newArrayList(Preconditions.checkNotNull(bootstraps)); + this.bootstraps = new ArrayList<>(Objects.requireNonNull(bootstraps)); boolean shouldClose = true; try { @@ -105,11 +104,13 @@ private void init(String hostToBind, int portToBind) { EventLoopGroup workerGroup = NettyUtils.createEventLoop(ioMode, conf.serverThreads(), conf.getModuleName() + "-server"); + String name = System.getProperty("os.name"); + boolean isNotWindows = !name.regionMatches(true, 0, "Windows", 0, 7); bootstrap = new ServerBootstrap() .group(bossGroup, workerGroup) .channel(NettyUtils.getServerChannelClass(ioMode)) .option(ChannelOption.ALLOCATOR, pooledAllocator) - .option(ChannelOption.SO_REUSEADDR, !SystemUtils.IS_OS_WINDOWS) + .option(ChannelOption.SO_REUSEADDR, isNotWindows) .childOption(ChannelOption.ALLOCATOR, pooledAllocator); this.metrics = new NettyMemoryMetrics( diff --git a/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java b/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java index a2e42e3eb39f6..f4b245ca7b128 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java +++ b/common/network-common/src/main/java/org/apache/spark/network/ssl/SSLFactory.java @@ -20,6 +20,7 @@ import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; +import java.nio.file.Files; import java.security.GeneralSecurityException; import java.security.KeyStore; import java.security.KeyStoreException; @@ -41,8 +42,6 @@ import javax.net.ssl.TrustManagerFactory; import javax.net.ssl.X509TrustManager; -import com.google.common.io.Files; - import io.netty.buffer.ByteBufAllocator; import io.netty.handler.ssl.OpenSsl; import io.netty.handler.ssl.SslContext; @@ -378,7 +377,7 @@ private static TrustManager[] trustStoreManagers( private static TrustManager[] defaultTrustManagers(File trustStore, String trustStorePassword) throws IOException, KeyStoreException, CertificateException, NoSuchAlgorithmException { - try (InputStream input = Files.asByteSource(trustStore).openStream()) { + try (InputStream input = Files.newInputStream(trustStore.toPath())) { KeyStore ks = KeyStore.getInstance(KeyStore.getDefaultType()); char[] passwordCharacters = trustStorePassword != null? trustStorePassword.toCharArray() : null; diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java b/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java index 6b208d95bbfbc..6ab401b9a0d5a 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/IOMode.java @@ -19,9 +19,18 @@ /** * Selector for which form of low-level IO we should use. - * NIO is always available, while EPOLL is only available on Linux. - * AUTO is used to select EPOLL if it's available, or NIO otherwise. */ public enum IOMode { - NIO, EPOLL + /** + * Java NIO (Selector), cross-platform portable + */ + NIO, + /** + * Native EPOLL via JNI, Linux only + */ + EPOLL, + /** + * Native KQUEUE via JNI, MacOS/BSD only + */ + KQUEUE } diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java index 391931961a474..ec3e032102e4f 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/LevelDBProvider.java @@ -50,7 +50,7 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map tmpDb = JniDBFactory.factory.open(dbFile, options); } catch (NativeDB.DBException e) { if (e.isNotFound() || e.getMessage().contains(" does not exist ")) { - logger.info("Creating state database at {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile)); + logger.info("Creating state database at {}", MDC.of(LogKeys.PATH, dbFile)); options.createIfMissing(true); try { tmpDb = JniDBFactory.factory.open(dbFile, options); @@ -61,16 +61,16 @@ public static DB initLevelDB(File dbFile, StoreVersion version, ObjectMapper map // the leveldb file seems to be corrupt somehow. Lets just blow it away and create a new // one, so we can keep processing new apps logger.error("error opening leveldb file {}. Creating new file, will not be able to " + - "recover state for existing applications", e, MDC.of(LogKeys.PATH$.MODULE$, dbFile)); + "recover state for existing applications", e, MDC.of(LogKeys.PATH, dbFile)); if (dbFile.isDirectory()) { for (File f : dbFile.listFiles()) { if (!f.delete()) { - logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, f.getPath())); + logger.warn("error deleting {}", MDC.of(LogKeys.PATH, f.getPath())); } } } if (!dbFile.delete()) { - logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile.getPath())); + logger.warn("error deleting {}", MDC.of(LogKeys.PATH, dbFile.getPath())); } options.createIfMissing(true); try { diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java b/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java index e6cf02a590e29..79cf0eb7c6153 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/LimitedInputStream.java @@ -21,8 +21,7 @@ import java.io.FilterInputStream; import java.io.IOException; import java.io.InputStream; - -import com.google.common.base.Preconditions; +import java.util.Objects; /** * Wraps a {@link InputStream}, limiting the number of bytes which can be read. @@ -50,10 +49,9 @@ public LimitedInputStream(InputStream in, long limit) { * @param closeWrappedStream whether to close {@code in} when {@link #close} is called */ public LimitedInputStream(InputStream in, long limit, boolean closeWrappedStream) { - super(in); + super(Objects.requireNonNull(in)); this.closeWrappedStream = closeWrappedStream; - Preconditions.checkNotNull(in); - Preconditions.checkArgument(limit >= 0, "limit must be non-negative"); + JavaUtils.checkArgument(limit >= 0, "limit must be non-negative"); left = limit; } @Override public int available() throws IOException { diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java index 2dd1c8f2e4a7d..da4b3109bbe1e 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/NettyUtils.java @@ -26,6 +26,9 @@ import io.netty.channel.epoll.EpollEventLoopGroup; import io.netty.channel.epoll.EpollServerSocketChannel; import io.netty.channel.epoll.EpollSocketChannel; +import io.netty.channel.kqueue.KQueueEventLoopGroup; +import io.netty.channel.kqueue.KQueueServerSocketChannel; +import io.netty.channel.kqueue.KQueueSocketChannel; import io.netty.channel.nio.NioEventLoopGroup; import io.netty.channel.socket.nio.NioServerSocketChannel; import io.netty.channel.socket.nio.NioSocketChannel; @@ -68,6 +71,7 @@ public static EventLoopGroup createEventLoop(IOMode mode, int numThreads, String return switch (mode) { case NIO -> new NioEventLoopGroup(numThreads, threadFactory); case EPOLL -> new EpollEventLoopGroup(numThreads, threadFactory); + case KQUEUE -> new KQueueEventLoopGroup(numThreads, threadFactory); }; } @@ -76,6 +80,7 @@ public static Class getClientChannelClass(IOMode mode) { return switch (mode) { case NIO -> NioSocketChannel.class; case EPOLL -> EpollSocketChannel.class; + case KQUEUE -> KQueueSocketChannel.class; }; } @@ -84,6 +89,7 @@ public static Class getServerChannelClass(IOMode mode) return switch (mode) { case NIO -> NioServerSocketChannel.class; case EPOLL -> EpollServerSocketChannel.class; + case KQUEUE -> KQueueServerSocketChannel.class; }; } diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java index 1753c124c9935..cea9207d3470a 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/RocksDBProvider.java @@ -67,7 +67,7 @@ public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper tmpDb = RocksDB.open(dbOptions, dbFile.toString()); } catch (RocksDBException e) { if (e.getStatus().getCode() == Status.Code.NotFound) { - logger.info("Creating state database at {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile)); + logger.info("Creating state database at {}", MDC.of(LogKeys.PATH, dbFile)); dbOptions.setCreateIfMissing(true); try { tmpDb = RocksDB.open(dbOptions, dbFile.toString()); @@ -78,16 +78,16 @@ public static RocksDB initRockDB(File dbFile, StoreVersion version, ObjectMapper // the RocksDB file seems to be corrupt somehow. Let's just blow it away and create // a new one, so we can keep processing new apps logger.error("error opening rocksdb file {}. Creating new file, will not be able to " + - "recover state for existing applications", e, MDC.of(LogKeys.PATH$.MODULE$, dbFile)); + "recover state for existing applications", e, MDC.of(LogKeys.PATH, dbFile)); if (dbFile.isDirectory()) { for (File f : Objects.requireNonNull(dbFile.listFiles())) { if (!f.delete()) { - logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, f.getPath())); + logger.warn("error deleting {}", MDC.of(LogKeys.PATH, f.getPath())); } } } if (!dbFile.delete()) { - logger.warn("error deleting {}", MDC.of(LogKeys.PATH$.MODULE$, dbFile.getPath())); + logger.warn("error deleting {}", MDC.of(LogKeys.PATH, dbFile.getPath())); } dbOptions.setCreateIfMissing(true); try { diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java index 822b8aa310a22..003e72edf29ee 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportConf.java @@ -21,8 +21,6 @@ import java.util.Locale; import java.util.Properties; import java.util.concurrent.TimeUnit; -import com.google.common.base.Preconditions; -import com.google.common.primitives.Ints; import io.netty.util.NettyRuntime; /** @@ -171,7 +169,7 @@ public int ioRetryWaitTimeMs() { * memory mapping has high overhead for blocks close to or below the page size of the OS. */ public int memoryMapBytes() { - return Ints.checkedCast(JavaUtils.byteStringAsBytes( + return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes( conf.get("spark.storage.memoryMapThreshold", "2m"))); } @@ -248,7 +246,7 @@ public boolean saslEncryption() { * Maximum number of bytes to be encrypted at a time when SASL encryption is used. */ public int maxSaslEncryptedBlockSize() { - return Ints.checkedCast(JavaUtils.byteStringAsBytes( + return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes( conf.get("spark.network.sasl.maxEncryptedBlockSize", "64k"))); } @@ -263,7 +261,7 @@ public boolean saslServerAlwaysEncrypt() { * When Secure (SSL/TLS) Shuffle is enabled, the Chunk size to use for shuffling files. */ public int sslShuffleChunkSize() { - return Ints.checkedCast(JavaUtils.byteStringAsBytes( + return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes( conf.get("spark.network.ssl.maxEncryptedBlockSize", "64k"))); } @@ -504,7 +502,7 @@ public int finalizeShuffleMergeHandlerThreads() { if (!this.getModuleName().equalsIgnoreCase("shuffle")) { return 0; } - Preconditions.checkArgument(separateFinalizeShuffleMerge(), + JavaUtils.checkArgument(separateFinalizeShuffleMerge(), "Please set spark.shuffle.server.finalizeShuffleMergeThreadsPercent to a positive value"); int finalizeShuffleMergeThreadsPercent = Integer.parseInt(conf.get("spark.shuffle.server.finalizeShuffleMergeThreadsPercent")); @@ -567,7 +565,7 @@ public String mergedShuffleFileManagerImpl() { * service unnecessarily. */ public int minChunkSizeInMergedShuffleFile() { - return Ints.checkedCast(JavaUtils.byteStringAsBytes( + return JavaUtils.checkedCast(JavaUtils.byteStringAsBytes( conf.get("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "2m"))); } diff --git a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java index cef0e415aa40a..6b490068507aa 100644 --- a/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java +++ b/common/network-common/src/main/java/org/apache/spark/network/util/TransportFrameDecoder.java @@ -20,7 +20,6 @@ import java.util.LinkedList; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.buffer.CompositeByteBuf; import io.netty.buffer.Unpooled; @@ -145,9 +144,9 @@ private ByteBuf decodeNext() { } if (frameBuf == null) { - Preconditions.checkArgument(frameSize < MAX_FRAME_SIZE, + JavaUtils.checkArgument(frameSize < MAX_FRAME_SIZE, "Too large frame: %s", frameSize); - Preconditions.checkArgument(frameSize > 0, + JavaUtils.checkArgument(frameSize > 0, "Frame length should be positive: %s", frameSize); frameRemainingBytes = (int) frameSize; @@ -252,7 +251,7 @@ public void handlerRemoved(ChannelHandlerContext ctx) throws Exception { } public void setInterceptor(Interceptor interceptor) { - Preconditions.checkState(this.interceptor == null, "Already have an interceptor."); + JavaUtils.checkState(this.interceptor == null, "Already have an interceptor."); this.interceptor = interceptor; } diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java index 576a106934fda..75ccd8d5789d4 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchIntegrationSuite.java @@ -30,7 +30,6 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; -import com.google.common.collect.Sets; import com.google.common.io.Closeables; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; @@ -188,7 +187,7 @@ public void onFailure(int chunkIndex, Throwable e) { @Test public void fetchBufferChunk() throws Exception { FetchResult res = fetchChunks(Arrays.asList(BUFFER_CHUNK_INDEX)); - assertEquals(Sets.newHashSet(BUFFER_CHUNK_INDEX), res.successChunks); + assertEquals(Set.of(BUFFER_CHUNK_INDEX), res.successChunks); assertTrue(res.failedChunks.isEmpty()); assertBufferListsEqual(Arrays.asList(bufferChunk), res.buffers); res.releaseBuffers(); @@ -197,7 +196,7 @@ public void fetchBufferChunk() throws Exception { @Test public void fetchFileChunk() throws Exception { FetchResult res = fetchChunks(Arrays.asList(FILE_CHUNK_INDEX)); - assertEquals(Sets.newHashSet(FILE_CHUNK_INDEX), res.successChunks); + assertEquals(Set.of(FILE_CHUNK_INDEX), res.successChunks); assertTrue(res.failedChunks.isEmpty()); assertBufferListsEqual(Arrays.asList(fileChunk), res.buffers); res.releaseBuffers(); @@ -207,14 +206,14 @@ public void fetchFileChunk() throws Exception { public void fetchNonExistentChunk() throws Exception { FetchResult res = fetchChunks(Arrays.asList(12345)); assertTrue(res.successChunks.isEmpty()); - assertEquals(Sets.newHashSet(12345), res.failedChunks); + assertEquals(Set.of(12345), res.failedChunks); assertTrue(res.buffers.isEmpty()); } @Test public void fetchBothChunks() throws Exception { FetchResult res = fetchChunks(Arrays.asList(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX)); - assertEquals(Sets.newHashSet(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX), res.successChunks); + assertEquals(Set.of(BUFFER_CHUNK_INDEX, FILE_CHUNK_INDEX), res.successChunks); assertTrue(res.failedChunks.isEmpty()); assertBufferListsEqual(Arrays.asList(bufferChunk, fileChunk), res.buffers); res.releaseBuffers(); @@ -223,8 +222,8 @@ public void fetchBothChunks() throws Exception { @Test public void fetchChunkAndNonExistent() throws Exception { FetchResult res = fetchChunks(Arrays.asList(BUFFER_CHUNK_INDEX, 12345)); - assertEquals(Sets.newHashSet(BUFFER_CHUNK_INDEX), res.successChunks); - assertEquals(Sets.newHashSet(12345), res.failedChunks); + assertEquals(Set.of(BUFFER_CHUNK_INDEX), res.successChunks); + assertEquals(Set.of(12345), res.failedChunks); assertBufferListsEqual(Arrays.asList(bufferChunk), res.buffers); res.releaseBuffers(); } diff --git a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java index 74dffd87dcf30..e9a4c355ebe48 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/ChunkFetchRequestHandlerSuite.java @@ -27,8 +27,6 @@ import static org.mockito.Mockito.*; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.client.TransportClient; import org.apache.spark.network.protocol.*; @@ -36,6 +34,7 @@ import org.apache.spark.network.server.NoOpRpcHandler; import org.apache.spark.network.server.OneForOneStreamManager; import org.apache.spark.network.server.RpcHandler; +import org.apache.spark.util.Pair; public class ChunkFetchRequestHandlerSuite { @@ -54,7 +53,7 @@ public void handleChunkFetchRequest() throws Exception { .thenAnswer(invocationOnMock0 -> { Object response = invocationOnMock0.getArguments()[0]; ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel); - responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture)); + responseAndPromisePairs.add(Pair.of(response, channelFuture)); return channelFuture; }); diff --git a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java index 8c1299ebcd836..500d91868bbbd 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/ProtocolSuite.java @@ -19,7 +19,6 @@ import java.util.List; -import com.google.common.primitives.Ints; import io.netty.buffer.Unpooled; import io.netty.channel.ChannelHandlerContext; import io.netty.channel.FileRegion; @@ -44,6 +43,7 @@ import org.apache.spark.network.protocol.StreamRequest; import org.apache.spark.network.protocol.StreamResponse; import org.apache.spark.network.util.ByteArrayWritableChannel; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.NettyUtils; public class ProtocolSuite { @@ -115,7 +115,8 @@ private static class FileRegionEncoder extends MessageToMessageEncoder out) throws Exception { - ByteArrayWritableChannel channel = new ByteArrayWritableChannel(Ints.checkedCast(in.count())); + ByteArrayWritableChannel channel = + new ByteArrayWritableChannel(JavaUtils.checkedCast(in.count())); while (in.transferred() < in.count()) { in.transferTo(channel, in.transferred()); } diff --git a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java index 40495d6912c91..e229e32e91717 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/RpcIntegrationSuite.java @@ -24,10 +24,6 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; -import com.google.common.collect.Sets; -import com.google.common.io.Files; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -41,6 +37,7 @@ import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.MapConfigProvider; import org.apache.spark.network.util.TransportConf; +import org.apache.spark.util.Pair; public class RpcIntegrationSuite { static TransportConf conf; @@ -248,14 +245,14 @@ public void onFailure(Throwable e) { @Test public void singleRPC() throws Exception { RpcResult res = sendRPC("hello/Aaron"); - assertEquals(Sets.newHashSet("Hello, Aaron!"), res.successMessages); + assertEquals(Set.of("Hello, Aaron!"), res.successMessages); assertTrue(res.errorMessages.isEmpty()); } @Test public void doubleRPC() throws Exception { RpcResult res = sendRPC("hello/Aaron", "hello/Reynold"); - assertEquals(Sets.newHashSet("Hello, Aaron!", "Hello, Reynold!"), res.successMessages); + assertEquals(Set.of("Hello, Aaron!", "Hello, Reynold!"), res.successMessages); assertTrue(res.errorMessages.isEmpty()); } @@ -263,28 +260,28 @@ public void doubleRPC() throws Exception { public void returnErrorRPC() throws Exception { RpcResult res = sendRPC("return error/OK"); assertTrue(res.successMessages.isEmpty()); - assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK")); + assertErrorsContain(res.errorMessages, Set.of("Returned: OK")); } @Test public void throwErrorRPC() throws Exception { RpcResult res = sendRPC("throw error/uh-oh"); assertTrue(res.successMessages.isEmpty()); - assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: uh-oh")); + assertErrorsContain(res.errorMessages, Set.of("Thrown: uh-oh")); } @Test public void doubleTrouble() throws Exception { RpcResult res = sendRPC("return error/OK", "throw error/uh-oh"); assertTrue(res.successMessages.isEmpty()); - assertErrorsContain(res.errorMessages, Sets.newHashSet("Returned: OK", "Thrown: uh-oh")); + assertErrorsContain(res.errorMessages, Set.of("Returned: OK", "Thrown: uh-oh")); } @Test public void sendSuccessAndFailure() throws Exception { RpcResult res = sendRPC("hello/Bob", "throw error/the", "hello/Builder", "return error/!"); - assertEquals(Sets.newHashSet("Hello, Bob!", "Hello, Builder!"), res.successMessages); - assertErrorsContain(res.errorMessages, Sets.newHashSet("Thrown: the", "Returned: !")); + assertEquals(Set.of("Hello, Bob!", "Hello, Builder!"), res.successMessages); + assertErrorsContain(res.errorMessages, Set.of("Thrown: the", "Returned: !")); } @Test @@ -311,7 +308,7 @@ public void sendRpcWithStreamOneAtATime() throws Exception { for (String stream : StreamTestHelper.STREAMS) { RpcResult res = sendRpcWithStream(stream); assertTrue(res.errorMessages.isEmpty(), "there were error messages!" + res.errorMessages); - assertEquals(Sets.newHashSet(stream), res.successMessages); + assertEquals(Set.of(stream), res.successMessages); } } @@ -322,7 +319,7 @@ public void sendRpcWithStreamConcurrently() throws Exception { streams[i] = StreamTestHelper.STREAMS[i % StreamTestHelper.STREAMS.length]; } RpcResult res = sendRpcWithStream(streams); - assertEquals(Sets.newHashSet(StreamTestHelper.STREAMS), res.successMessages); + assertEquals(Set.of(StreamTestHelper.STREAMS), res.successMessages); assertTrue(res.errorMessages.isEmpty()); } @@ -342,8 +339,8 @@ public void sendRpcWithStreamFailures() throws Exception { RpcResult exceptionInOnComplete = sendRpcWithStream("fail/exception-oncomplete/smallBuffer", "smallBuffer"); assertErrorsContain(exceptionInOnComplete.errorMessages, - Sets.newHashSet("Failure post-processing")); - assertEquals(Sets.newHashSet("smallBuffer"), exceptionInOnComplete.successMessages); + Set.of("Failure post-processing")); + assertEquals(Set.of("smallBuffer"), exceptionInOnComplete.successMessages); } private void assertErrorsContain(Set errors, Set contains) { @@ -365,14 +362,14 @@ private void assertErrorAndClosed(RpcResult result, String expectedError) { // We expect 1 additional error due to closed connection and here are possible keywords in the // error message. - Set possibleClosedErrors = Sets.newHashSet( + Set possibleClosedErrors = Set.of( "closed", "Connection reset", "java.nio.channels.ClosedChannelException", "io.netty.channel.StacklessClosedChannelException", "java.io.IOException: Broken pipe" ); - Set containsAndClosed = Sets.newHashSet(expectedError); + Set containsAndClosed = new HashSet<>(Set.of(expectedError)); containsAndClosed.addAll(possibleClosedErrors); Pair, Set> r = checkErrorsContain(errors, containsAndClosed); @@ -392,8 +389,8 @@ private void assertErrorAndClosed(RpcResult result, String expectedError) { private Pair, Set> checkErrorsContain( Set errors, Set contains) { - Set remainingErrors = Sets.newHashSet(errors); - Set notFound = Sets.newHashSet(); + Set remainingErrors = new HashSet<>(errors); + Set notFound = new HashSet<>(); for (String contain : contains) { Iterator it = remainingErrors.iterator(); boolean foundMatch = false; @@ -408,7 +405,7 @@ private Pair, Set> checkErrorsContain( notFound.add(contain); } } - return new ImmutablePair<>(remainingErrors, notFound); + return new Pair<>(remainingErrors, notFound); } private static class VerifyingStreamCallback implements StreamCallbackWithID { @@ -431,7 +428,8 @@ private static class VerifyingStreamCallback implements StreamCallbackWithID { void verify() throws IOException { if (streamId.equals("file")) { - assertTrue(Files.equal(testData.testFile, outFile), "File stream did not match."); + assertTrue(JavaUtils.contentEquals(testData.testFile, outFile), + "File stream did not match."); } else { byte[] result = ((ByteArrayOutputStream)out).toByteArray(); ByteBuffer srcBuffer = testData.srcBuffer(streamId); diff --git a/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java b/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java index 4f4637e302b94..496af96cb1cac 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/StreamSuite.java @@ -29,7 +29,6 @@ import java.util.concurrent.ExecutorService; import java.util.concurrent.TimeUnit; -import com.google.common.io.Files; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; import org.junit.jupiter.api.Test; @@ -43,6 +42,7 @@ import org.apache.spark.network.server.RpcHandler; import org.apache.spark.network.server.StreamManager; import org.apache.spark.network.server.TransportServer; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.MapConfigProvider; import org.apache.spark.network.util.TransportConf; @@ -212,7 +212,8 @@ public void run() { callback.waitForCompletion(timeoutMs); if (srcBuffer == null) { - assertTrue(Files.equal(testData.testFile, outFile), "File stream did not match."); + assertTrue(JavaUtils.contentEquals(testData.testFile, outFile), + "File stream did not match."); } else { ByteBuffer base; synchronized (srcBuffer) { diff --git a/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java b/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java index d1e93e3cb5845..828d995ba444b 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java +++ b/common/network-common/src/test/java/org/apache/spark/network/TestManagedBuffer.java @@ -21,11 +21,11 @@ import java.io.InputStream; import java.nio.ByteBuffer; -import com.google.common.base.Preconditions; import io.netty.buffer.Unpooled; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.buffer.NettyManagedBuffer; +import org.apache.spark.network.util.JavaUtils; /** * A ManagedBuffer implementation that contains 0, 1, 2, 3, ..., (len-1). @@ -38,7 +38,7 @@ public class TestManagedBuffer extends ManagedBuffer { private NettyManagedBuffer underlying; public TestManagedBuffer(int len) { - Preconditions.checkArgument(len <= Byte.MAX_VALUE); + JavaUtils.checkArgument(len <= Byte.MAX_VALUE, "length exceeds limit " + Byte.MAX_VALUE); this.len = len; byte[] byteArray = new byte[len]; for (int i = 0; i < len; i ++) { diff --git a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java index d643fb4f662e3..9433e274b507f 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/TransportRequestHandlerSuite.java @@ -28,8 +28,6 @@ import static org.junit.jupiter.api.Assertions.*; import static org.mockito.Mockito.*; -import org.apache.commons.lang3.tuple.ImmutablePair; -import org.apache.commons.lang3.tuple.Pair; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.client.RpcResponseCallback; import org.apache.spark.network.client.TransportClient; @@ -39,6 +37,7 @@ import org.apache.spark.network.server.RpcHandler; import org.apache.spark.network.server.StreamManager; import org.apache.spark.network.server.TransportRequestHandler; +import org.apache.spark.util.Pair; public class TransportRequestHandlerSuite { @@ -53,7 +52,7 @@ public void handleStreamRequest() throws Exception { .thenAnswer(invocationOnMock0 -> { Object response = invocationOnMock0.getArguments()[0]; ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel); - responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture)); + responseAndPromisePairs.add(Pair.of(response, channelFuture)); return channelFuture; }); @@ -145,7 +144,7 @@ public MergedBlockMetaReqHandler getMergedBlockMetaReqHandler() { when(channel.writeAndFlush(any())).thenAnswer(invocationOnMock0 -> { Object response = invocationOnMock0.getArguments()[0]; ExtendedChannelPromise channelFuture = new ExtendedChannelPromise(channel); - responseAndPromisePairs.add(ImmutablePair.of(response, channelFuture)); + responseAndPromisePairs.add(Pair.of(response, channelFuture)); return channelFuture; }); diff --git a/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java b/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java index b57f0be920c7b..58faea2cf2dda 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/client/TransportClientFactorySuite.java @@ -225,11 +225,12 @@ public void closeFactoryBeforeCreateClient() { } @Test - public void fastFailConnectionInTimeWindow() { + public void fastFailConnectionInTimeWindow() throws IOException, InterruptedException { TransportClientFactory factory = context.createClientFactory(); TransportServer server = context.createServer(); int unreachablePort = server.getPort(); server.close(); + Thread.sleep(1000); Assertions.assertThrows(IOException.class, () -> factory.createClient(TestUtils.getLocalHost(), unreachablePort, true)); Assertions.assertThrows(IOException.class, @@ -258,6 +259,7 @@ public void unlimitedConnectionAndCreationTimeouts() throws IOException, Interru TransportServer server = ctx.createServer(); int unreachablePort = server.getPort(); JavaUtils.closeQuietly(server); + Thread.sleep(1000); IOException exception = Assertions.assertThrows(IOException.class, () -> factory.createClient(TestUtils.getLocalHost(), unreachablePort, true)); assertNotEquals(exception.getCause(), null); diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java index 628de9e780337..904deabba5867 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthEngineSuite.java @@ -21,7 +21,6 @@ import java.security.GeneralSecurityException; import java.util.Map; -import com.google.common.collect.ImmutableMap; import com.google.crypto.tink.subtle.Hex; import org.apache.spark.network.util.*; @@ -49,7 +48,7 @@ abstract class AuthEngineSuite { static TransportConf getConf(int authEngineVerison, boolean useCtr) { String authEngineVersion = (authEngineVerison == 1) ? "1" : "2"; String mode = useCtr ? "AES/CTR/NoPadding" : "AES/GCM/NoPadding"; - Map confMap = ImmutableMap.of( + Map confMap = Map.of( "spark.network.crypto.enabled", "true", "spark.network.crypto.authEngineVersion", authEngineVersion, "spark.network.crypto.cipher", mode diff --git a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java index cb5929f7c65b4..8d1773316878b 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/crypto/AuthIntegrationSuite.java @@ -22,7 +22,6 @@ import java.util.List; import java.util.Map; -import com.google.common.collect.ImmutableMap; import io.netty.channel.Channel; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Test; @@ -214,7 +213,7 @@ private static class AuthTestCtx { } AuthTestCtx(RpcHandler rpcHandler, String mode) throws Exception { - Map testConf = ImmutableMap.of( + Map testConf = Map.of( "spark.network.crypto.enabled", "true", "spark.network.crypto.cipher", mode); this.conf = new TransportConf("rpc", new MapConfigProvider(testConf)); diff --git a/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java b/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java index a3750ce11172b..41b84f3895876 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/protocol/MergedBlockMetaSuccessSuite.java @@ -21,9 +21,9 @@ import java.io.File; import java.io.FileOutputStream; import java.nio.file.Files; +import java.util.ArrayList; import java.util.List; -import com.google.common.collect.Lists; import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; @@ -65,7 +65,7 @@ public void testMergedBlocksMetaEncodeDecode() throws Exception { MergedBlockMetaSuccess expectedMeta = new MergedBlockMetaSuccess(requestId, 2, new FileSegmentManagedBuffer(conf, chunkMetaFile, 0, chunkMetaFile.length())); - List out = Lists.newArrayList(); + List out = new ArrayList<>(); ChannelHandlerContext context = mock(ChannelHandlerContext.class); when(context.alloc()).thenReturn(ByteBufAllocator.DEFAULT); diff --git a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java index bf0424a1506a3..4feaf5cef3f2b 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/sasl/SparkSaslSuite.java @@ -23,6 +23,7 @@ import java.io.File; import java.lang.reflect.Method; import java.nio.ByteBuffer; +import java.nio.file.Files; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; @@ -36,8 +37,6 @@ import javax.security.sasl.SaslException; import com.google.common.collect.ImmutableMap; -import com.google.common.io.ByteStreams; -import com.google.common.io.Files; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import io.netty.channel.Channel; @@ -221,7 +220,7 @@ public void testEncryptedMessageChunking() throws Exception { byte[] data = new byte[8 * 1024]; new Random().nextBytes(data); - Files.write(data, file); + Files.write(file.toPath(), data); SaslEncryptionBackend backend = mock(SaslEncryptionBackend.class); // It doesn't really matter what we return here, as long as it's not null. @@ -245,7 +244,7 @@ public void testEncryptedMessageChunking() throws Exception { @Test public void testFileRegionEncryption() throws Exception { - Map testConf = ImmutableMap.of( + Map testConf = Map.of( "spark.network.sasl.maxEncryptedBlockSize", "1k"); AtomicReference response = new AtomicReference<>(); @@ -262,7 +261,7 @@ public void testFileRegionEncryption() throws Exception { byte[] data = new byte[8 * 1024]; new Random().nextBytes(data); - Files.write(data, file); + Files.write(file.toPath(), data); ctx = new SaslTestCtx(rpcHandler, true, false, testConf); @@ -282,7 +281,7 @@ public void testFileRegionEncryption() throws Exception { verify(callback, times(1)).onSuccess(anyInt(), any(ManagedBuffer.class)); verify(callback, never()).onFailure(anyInt(), any(Throwable.class)); - byte[] received = ByteStreams.toByteArray(response.get().createInputStream()); + byte[] received = response.get().createInputStream().readAllBytes(); assertArrayEquals(data, received); } finally { file.delete(); @@ -299,7 +298,7 @@ public void testFileRegionEncryption() throws Exception { public void testServerAlwaysEncrypt() { Exception re = assertThrows(Exception.class, () -> new SaslTestCtx(mock(RpcHandler.class), false, false, - ImmutableMap.of("spark.network.sasl.serverAlwaysEncrypt", "true"))); + Map.of("spark.network.sasl.serverAlwaysEncrypt", "true"))); assertTrue(re.getCause() instanceof SaslException); } diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java index 47c9ef2490d2e..9673bbfbc2de7 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/util/CryptoUtilsSuite.java @@ -20,7 +20,6 @@ import java.util.Map; import java.util.Properties; -import com.google.common.collect.ImmutableMap; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.*; @@ -38,7 +37,7 @@ public void testConfConversion() { String confVal2 = "val2"; String cryptoKey2 = CryptoUtils.COMMONS_CRYPTO_CONFIG_PREFIX + "A.b.c"; - Map conf = ImmutableMap.of( + Map conf = Map.of( confKey1, confVal1, confKey2, confVal2); diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java index 81bfc55264c4c..f7299e157674e 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/util/DBProviderSuite.java @@ -18,7 +18,6 @@ package org.apache.spark.network.util; import com.fasterxml.jackson.databind.ObjectMapper; -import org.apache.commons.lang3.SystemUtils; import org.apache.spark.network.shuffledb.DBBackend; import org.apache.spark.network.shuffledb.StoreVersion; import org.junit.jupiter.api.Assertions; @@ -38,7 +37,7 @@ public void testRockDBCheckVersionFailed() throws IOException, InterruptedExcept @Test public void testLevelDBCheckVersionFailed() throws IOException, InterruptedException { - assumeFalse(SystemUtils.IS_OS_MAC_OSX && SystemUtils.OS_ARCH.equals("aarch64")); + assumeFalse(JavaUtils.isMacOnAppleSilicon); testCheckVersionFailed(DBBackend.LEVELDB, "leveldb"); } diff --git a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java index 1336a587fd2eb..2edeb3f05c9b0 100644 --- a/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java +++ b/common/network-common/src/test/java/org/apache/spark/network/util/JavaUtilsSuite.java @@ -18,6 +18,7 @@ import java.io.File; import java.io.IOException; +import java.nio.file.Files; import org.junit.jupiter.api.Test; @@ -55,4 +56,33 @@ public void testCreateDirectory() throws IOException { () -> JavaUtils.createDirectory(testDirPath, "scenario4")); assertTrue(testDir.setWritable(true)); } + + @Test + public void testListFiles() throws IOException { + File tmp = Files.createTempDirectory("testListFiles").toFile(); + File file = new File(tmp, "file"); + + // Return emtpy set on non-existent input + assertFalse(file.exists()); + assertEquals(0, JavaUtils.listFiles(file).size()); + assertEquals(0, JavaUtils.listPaths(file).size()); + + // Return emtpy set on non-directory input + file.createNewFile(); + assertTrue(file.exists()); + assertEquals(0, JavaUtils.listFiles(file).size()); + assertEquals(0, JavaUtils.listPaths(file).size()); + + // Return empty set on an empty directory location + File dir = new File(tmp, "dir"); + dir.mkdir(); + new File(dir, "1").createNewFile(); + assertEquals(1, JavaUtils.listFiles(dir).size()); + assertEquals(1, JavaUtils.listPaths(dir).size()); + + File symlink = new File(tmp, "symlink"); + Files.createSymbolicLink(symlink.toPath(), dir.toPath()); + assertEquals(1, JavaUtils.listFiles(symlink).size()); + assertEquals(1, JavaUtils.listPaths(symlink).size()); + } } diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index adfc55d28c357..60ad971573997 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -42,6 +42,11 @@ ${project.version} + + org.apache.commons + commons-lang3 + + io.dropwizard.metrics metrics-core diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java index d67f2a3099d35..625cb2e1257da 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/sasl/ShuffleSecretManager.java @@ -55,7 +55,7 @@ public void registerApp(String appId, String shuffleSecret) { // to the applicationId since the secrets change between application attempts on yarn. shuffleSecretMap.put(appId, shuffleSecret); logger.info("Registered shuffle secret for application {}", - MDC.of(LogKeys.APP_ID$.MODULE$, appId)); + MDC.of(LogKeys.APP_ID, appId)); } /** @@ -72,7 +72,7 @@ public void registerApp(String appId, ByteBuffer shuffleSecret) { public void unregisterApp(String appId) { shuffleSecretMap.remove(appId); logger.info("Unregistered shuffle secret for application {}", - MDC.of(LogKeys.APP_ID$.MODULE$, appId)); + MDC.of(LogKeys.APP_ID, appId)); } /** diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java index 6a029a1083a47..7a0b316a3a8ea 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/AppsWithRecoveryDisabled.java @@ -18,11 +18,10 @@ package org.apache.spark.network.shuffle; import java.util.Collections; +import java.util.Objects; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; -import com.google.common.base.Preconditions; - /** * Stores the applications which have recovery disabled. */ @@ -41,8 +40,7 @@ private AppsWithRecoveryDisabled() { * @param appId application id */ public static void disableRecoveryOfApp(String appId) { - Preconditions.checkNotNull(appId); - INSTANCE.appsWithRecoveryDisabled.add(appId); + INSTANCE.appsWithRecoveryDisabled.add(Objects.requireNonNull(appId)); } /** @@ -51,8 +49,7 @@ public static void disableRecoveryOfApp(String appId) { * @return true if the application is enabled for recovery; false otherwise. */ public static boolean isRecoveryEnabledForApp(String appId) { - Preconditions.checkNotNull(appId); - return !INSTANCE.appsWithRecoveryDisabled.contains(appId); + return !INSTANCE.appsWithRecoveryDisabled.contains(Objects.requireNonNull(appId)); } /** @@ -60,7 +57,6 @@ public static boolean isRecoveryEnabledForApp(String appId) { * @param appId application id */ public static void removeApp(String appId) { - Preconditions.checkNotNull(appId); - INSTANCE.appsWithRecoveryDisabled.remove(appId); + INSTANCE.appsWithRecoveryDisabled.remove(Objects.requireNonNull(appId)); } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java index dcb0a52b0d66c..ceb5d64699744 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/BlockStoreClient.java @@ -173,7 +173,7 @@ public void onSuccess(ByteBuffer response) { ((LocalDirsForExecutors) msgObj).getLocalDirsByExec()); } catch (Throwable t) { logger.warn("Error while trying to get the host local dirs for {}", t.getCause(), - MDC.of(LogKeys.EXECUTOR_IDS$.MODULE$, Arrays.toString(getLocalDirsMessage.execIds))); + MDC.of(LogKeys.EXECUTOR_IDS, Arrays.toString(getLocalDirsMessage.execIds))); hostLocalDirsCompletable.completeExceptionally(t); } } @@ -181,7 +181,7 @@ public void onSuccess(ByteBuffer response) { @Override public void onFailure(Throwable t) { logger.warn("Error while trying to get the host local dirs for {}", t.getCause(), - MDC.of(LogKeys.EXECUTOR_IDS$.MODULE$, Arrays.toString(getLocalDirsMessage.execIds))); + MDC.of(LogKeys.EXECUTOR_IDS, Arrays.toString(getLocalDirsMessage.execIds))); hostLocalDirsCompletable.completeExceptionally(t); } }); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java index 31ed10ad76f8f..298611cc8567f 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ErrorHandler.java @@ -20,10 +20,9 @@ import java.io.FileNotFoundException; import java.net.ConnectException; -import com.google.common.base.Throwables; - import org.apache.spark.annotation.Evolving; import org.apache.spark.network.server.BlockPushNonFatalFailure; +import org.apache.spark.network.util.JavaUtils; /** * Plugs into {@link RetryingBlockTransferor} to further control when an exception should be retried @@ -105,12 +104,12 @@ class BlockFetchErrorHandler implements ErrorHandler { @Override public boolean shouldRetryError(Throwable t) { - return !Throwables.getStackTraceAsString(t).contains(STALE_SHUFFLE_BLOCK_FETCH); + return !JavaUtils.stackTraceToString(t).contains(STALE_SHUFFLE_BLOCK_FETCH); } @Override public boolean shouldLogError(Throwable t) { - return !Throwables.getStackTraceAsString(t).contains(STALE_SHUFFLE_BLOCK_FETCH); + return !JavaUtils.stackTraceToString(t).contains(STALE_SHUFFLE_BLOCK_FETCH); } } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java index 5d33bfb345a9e..45d0ff69de900 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockHandler.java @@ -21,8 +21,10 @@ import java.io.IOException; import java.nio.ByteBuffer; import java.util.HashMap; +import java.util.HashSet; import java.util.Iterator; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.TimeUnit; import java.util.function.Function; @@ -35,8 +37,6 @@ import com.codahale.metrics.Timer; import com.codahale.metrics.Counter; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Sets; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -199,7 +199,7 @@ protected void handleMessage( } else if (msgObj instanceof GetLocalDirsForExecutors msg) { checkAuth(client, msg.appId); - Set execIdsForBlockResolver = Sets.newHashSet(msg.execIds); + Set execIdsForBlockResolver = new HashSet<>(Set.of(msg.execIds)); boolean fetchMergedBlockDirs = execIdsForBlockResolver.remove(SHUFFLE_MERGER_IDENTIFIER); Map localDirs = blockManager.getLocalDirs(msg.appId, execIdsForBlockResolver); @@ -224,9 +224,9 @@ protected void handleMessage( } else if (msgObj instanceof RemoveShuffleMerge msg) { checkAuth(client, msg.appId); logger.info("Removing shuffle merge data for application {} shuffle {} shuffleMerge {}", - MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId)); + MDC.of(LogKeys.APP_ID, msg.appId), + MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId), + MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId)); mergeManager.removeShuffleMerge(msg); } else if (msgObj instanceof DiagnoseCorruption msg) { checkAuth(client, msg.appId); @@ -585,7 +585,7 @@ public boolean hasNext() { @Override public ManagedBuffer next() { - ManagedBuffer block = Preconditions.checkNotNull(mergeManager.getMergedBlockData( + ManagedBuffer block = Objects.requireNonNull(mergeManager.getMergedBlockData( appId, shuffleId, shuffleMergeId, reduceIds[reduceIdx], chunkIds[reduceIdx][chunkIdx])); if (chunkIdx < chunkIds[reduceIdx].length - 1) { chunkIdx += 1; diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java index 97723f77723d4..4fdd39c3471fc 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalBlockStoreClient.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.nio.ByteBuffer; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -27,7 +28,6 @@ import java.util.concurrent.Future; import com.codahale.metrics.MetricSet; -import com.google.common.collect.Lists; import org.apache.spark.internal.LogKeys; import org.apache.spark.internal.MDC; @@ -82,7 +82,7 @@ public void init(String appId) { this.appId = appId; TransportContext context = new TransportContext( transportConf, new NoOpRpcHandler(), true, true); - List bootstraps = Lists.newArrayList(); + List bootstraps = new ArrayList<>(); if (authEnabled) { bootstraps.add(new AuthClientBootstrap(transportConf, appId, secretKeyHolder)); } @@ -106,7 +106,7 @@ private void setComparableAppAttemptId(String appAttemptId) { } catch (NumberFormatException e) { logger.warn("Push based shuffle requires comparable application attemptId, " + "but the appAttemptId {} cannot be parsed to Integer", e, - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId)); + MDC.of(LogKeys.APP_ATTEMPT_ID, appAttemptId)); } } @@ -221,8 +221,8 @@ public void onFailure(Throwable e) { }); } catch (Exception e) { logger.error("Exception while sending finalizeShuffleMerge request to {}:{}", e, - MDC.of(LogKeys.HOST$.MODULE$, host), - MDC.of(LogKeys.PORT$.MODULE$, port)); + MDC.of(LogKeys.HOST, host), + MDC.of(LogKeys.PORT, port)); listener.onShuffleMergeFailure(e); } } @@ -322,8 +322,8 @@ public void onSuccess(ByteBuffer response) { } catch (Throwable t) { logger.warn("Error trying to remove blocks {} via external shuffle service from " + "executor: {}", t, - MDC.of(LogKeys.BLOCK_IDS$.MODULE$, Arrays.toString(blockIds)), - MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, execId)); + MDC.of(LogKeys.BLOCK_IDS, Arrays.toString(blockIds)), + MDC.of(LogKeys.EXECUTOR_ID, execId)); numRemovedBlocksFuture.complete(0); } } @@ -331,8 +331,8 @@ public void onSuccess(ByteBuffer response) { @Override public void onFailure(Throwable e) { logger.warn("Error trying to remove blocks {} via external shuffle service from " + - "executor: {}", e, MDC.of(LogKeys.BLOCK_IDS$.MODULE$, Arrays.toString(blockIds)), - MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, execId)); + "executor: {}", e, MDC.of(LogKeys.BLOCK_IDS, Arrays.toString(blockIds)), + MDC.of(LogKeys.EXECUTOR_ID, execId)); numRemovedBlocksFuture.complete(0); } }); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java index e43eedd8b25eb..b3002833fce1a 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolver.java @@ -20,15 +20,13 @@ import java.io.*; import java.nio.charset.StandardCharsets; import java.util.*; +import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ExecutionException; import java.util.concurrent.Executor; import java.util.concurrent.Executors; import java.util.stream.Collectors; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; -import org.apache.commons.lang3.tuple.Pair; import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import com.fasterxml.jackson.databind.ObjectMapper; @@ -37,7 +35,6 @@ import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; import com.google.common.cache.Weigher; -import com.google.common.collect.Maps; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -56,6 +53,7 @@ import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.NettyUtils; import org.apache.spark.network.util.TransportConf; +import org.apache.spark.util.Pair; /** * Manages converting shuffle BlockIds into physical segments of local files, from a process outside @@ -134,11 +132,11 @@ public ShuffleIndexInformation load(String filePath) throws IOException { db = DBProvider.initDB(dbBackend, this.registeredExecutorFile, CURRENT_VERSION, mapper); if (db != null) { logger.info("Use {} as the implementation of {}", - MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend), - MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND)); + MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME, dbBackend), + MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY, Constants.SHUFFLE_SERVICE_DB_BACKEND)); executors = reloadRegisteredExecutors(db); } else { - executors = Maps.newConcurrentMap(); + executors = new ConcurrentHashMap<>(); } this.directoryCleaner = directoryCleaner; } @@ -154,8 +152,8 @@ public void registerExecutor( ExecutorShuffleInfo executorInfo) { AppExecId fullId = new AppExecId(appId, execId); logger.info("Registered executor {} with {}", - MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId), - MDC.of(LogKeys.EXECUTOR_SHUFFLE_INFO$.MODULE$, executorInfo)); + MDC.of(LogKeys.APP_EXECUTOR_ID, fullId), + MDC.of(LogKeys.EXECUTOR_SHUFFLE_INFO, executorInfo)); try { if (db != null && AppsWithRecoveryDisabled.isRecoveryEnabledForApp(appId)) { byte[] key = dbAppExecKey(fullId); @@ -221,8 +219,8 @@ public ManagedBuffer getRddBlockData( */ public void applicationRemoved(String appId, boolean cleanupLocalDirs) { logger.info("Application {} removed, cleanupLocalDirs = {}", - MDC.of(LogKeys.APP_ID$.MODULE$, appId), - MDC.of(LogKeys.CLEANUP_LOCAL_DIRS$.MODULE$, cleanupLocalDirs)); + MDC.of(LogKeys.APP_ID, appId), + MDC.of(LogKeys.CLEANUP_LOCAL_DIRS, cleanupLocalDirs)); Iterator> it = executors.entrySet().iterator(); while (it.hasNext()) { Map.Entry entry = it.next(); @@ -237,14 +235,14 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) { db.delete(dbAppExecKey(fullId)); } catch (IOException e) { logger.error("Error deleting {} from executor state db", e, - MDC.of(LogKeys.APP_ID$.MODULE$, appId)); + MDC.of(LogKeys.APP_ID, appId)); } } if (cleanupLocalDirs) { logger.info("Cleaning up executor {}'s {} local dirs", - MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId), - MDC.of(LogKeys.NUM_LOCAL_DIRS$.MODULE$, executor.localDirs.length)); + MDC.of(LogKeys.APP_EXECUTOR_ID, fullId), + MDC.of(LogKeys.NUM_LOCAL_DIRS, executor.localDirs.length)); // Execute the actual deletion in a different thread, as it may take some time. directoryCleaner.execute(() -> deleteExecutorDirs(executor.localDirs)); @@ -259,18 +257,18 @@ public void applicationRemoved(String appId, boolean cleanupLocalDirs) { */ public void executorRemoved(String executorId, String appId) { logger.info("Clean up non-shuffle and non-RDD files associated with the finished executor {}", - MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, executorId)); + MDC.of(LogKeys.EXECUTOR_ID, executorId)); AppExecId fullId = new AppExecId(appId, executorId); final ExecutorShuffleInfo executor = executors.get(fullId); if (executor == null) { // Executor not registered, skip clean up of the local directories. logger.info("Executor is not registered (appId={}, execId={})", - MDC.of(LogKeys.APP_ID$.MODULE$, appId), - MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, executorId)); + MDC.of(LogKeys.APP_ID, appId), + MDC.of(LogKeys.EXECUTOR_ID, executorId)); } else { logger.info("Cleaning up non-shuffle and non-RDD files in executor {}'s {} local dirs", - MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, fullId), - MDC.of(LogKeys.NUM_LOCAL_DIRS$.MODULE$, executor.localDirs.length)); + MDC.of(LogKeys.APP_EXECUTOR_ID, fullId), + MDC.of(LogKeys.NUM_LOCAL_DIRS, executor.localDirs.length)); // Execute the actual deletion in a different thread, as it may take some time. directoryCleaner.execute(() -> deleteNonShuffleServiceServedFiles(executor.localDirs)); @@ -288,7 +286,7 @@ private void deleteExecutorDirs(String[] dirs) { logger.debug("Successfully cleaned up directory: {}", localDir); } catch (Exception e) { logger.error("Failed to delete directory: {}", e, - MDC.of(LogKeys.PATH$.MODULE$, localDir)); + MDC.of(LogKeys.PATH, localDir)); } } } @@ -311,7 +309,7 @@ private void deleteNonShuffleServiceServedFiles(String[] dirs) { localDir); } catch (Exception e) { logger.error("Failed to delete files not served by shuffle service in directory: {}", e, - MDC.of(LogKeys.PATH$.MODULE$, localDir)); + MDC.of(LogKeys.PATH, localDir)); } } } @@ -384,7 +382,7 @@ public int removeBlocks(String appId, String execId, String[] blockIds) { numRemovedBlocks++; } else { logger.warn("Failed to delete block: {}", - MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath())); + MDC.of(LogKeys.PATH, file.getAbsolutePath())); } } return numRemovedBlocks; @@ -400,7 +398,7 @@ public Map getLocalDirs(String appId, Set execIds) { } return Pair.of(exec, info.localDirs); }) - .collect(Collectors.toMap(Pair::getKey, Pair::getValue)); + .collect(Collectors.toMap(Pair::getLeft, Pair::getRight)); } /** @@ -451,10 +449,7 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execId", execId) - .toString(); + return "ExternalShuffleBlockResolver[appId=" + appId + ",execId=" + execId + "]"; } } @@ -477,7 +472,7 @@ private static AppExecId parseDbAppExecKey(String s) throws IOException { @VisibleForTesting static ConcurrentMap reloadRegisteredExecutors(DB db) throws IOException { - ConcurrentMap registeredExecutors = Maps.newConcurrentMap(); + ConcurrentMap registeredExecutors = new ConcurrentHashMap<>(); if (db != null) { try (DBIterator itr = db.iterator()) { itr.seek(APP_KEY_PREFIX.getBytes(StandardCharsets.UTF_8)); @@ -489,7 +484,7 @@ static ConcurrentMap reloadRegisteredExecutors(D } AppExecId id = parseDbAppExecKey(key); logger.info("Reloading registered executors: {}", - MDC.of(LogKeys.APP_EXECUTOR_ID$.MODULE$, id)); + MDC.of(LogKeys.APP_EXECUTOR_ID, id)); ExecutorShuffleInfo shuffleInfo = mapper.readValue(e.getValue(), ExecutorShuffleInfo.class); registeredExecutors.put(id, shuffleInfo); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java index 5541b7460ac96..ca8d9bbe65500 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/MergedBlockMeta.java @@ -20,8 +20,8 @@ import java.io.IOException; import java.util.ArrayList; import java.util.List; +import java.util.Objects; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; import io.netty.buffer.Unpooled; import org.roaringbitmap.RoaringBitmap; @@ -43,7 +43,7 @@ public class MergedBlockMeta { public MergedBlockMeta(int numChunks, ManagedBuffer chunksBitmapBuffer) { this.numChunks = numChunks; - this.chunksBitmapBuffer = Preconditions.checkNotNull(chunksBitmapBuffer); + this.chunksBitmapBuffer = Objects.requireNonNull(chunksBitmapBuffer); } public int getNumChunks() { diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java index d90ca1a88a267..05158a6600d0d 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/OneForOneBlockPusher.java @@ -21,8 +21,6 @@ import java.util.Arrays; import java.util.Map; -import com.google.common.base.Preconditions; - import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; import org.apache.spark.network.buffer.ManagedBuffer; @@ -34,6 +32,7 @@ import org.apache.spark.network.shuffle.protocol.BlockPushReturnCode; import org.apache.spark.network.shuffle.protocol.BlockTransferMessage; import org.apache.spark.network.shuffle.protocol.PushBlockStream; +import org.apache.spark.network.util.JavaUtils; /** * Similar to {@link OneForOneBlockFetcher}, but for pushing blocks to remote shuffle service to @@ -90,7 +89,7 @@ public void onSuccess(ByteBuffer response) { ReturnCode returnCode = BlockPushNonFatalFailure.getReturnCode(pushResponse.returnCode); if (returnCode != ReturnCode.SUCCESS) { String blockId = pushResponse.failureBlockId; - Preconditions.checkArgument(!blockId.isEmpty()); + JavaUtils.checkArgument(!blockId.isEmpty(), "BlockID should not be empty"); checkAndFailRemainingBlocks(index, new BlockPushNonFatalFailure(returnCode, BlockPushNonFatalFailure.getErrorMsg(blockId, returnCode))); } else { diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java index 6e9bd548f5327..a48208bad5b8c 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RemoteBlockPushResolver.java @@ -55,7 +55,6 @@ import com.codahale.metrics.Metric; import com.codahale.metrics.MetricSet; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; import com.google.common.cache.CacheBuilder; import com.google.common.cache.CacheLoader; import com.google.common.cache.LoadingCache; @@ -187,8 +186,8 @@ public ShuffleIndexInformation load(String filePath) throws IOException { db = DBProvider.initDB(dbBackend, this.recoveryFile, CURRENT_VERSION, mapper); if (db != null) { logger.info("Use {} as the implementation of {}", - MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME$.MODULE$, dbBackend), - MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY$.MODULE$, Constants.SHUFFLE_SERVICE_DB_BACKEND)); + MDC.of(LogKeys.SHUFFLE_DB_BACKEND_NAME, dbBackend), + MDC.of(LogKeys.SHUFFLE_DB_BACKEND_KEY, Constants.SHUFFLE_SERVICE_DB_BACKEND)); reloadAndCleanUpAppShuffleInfo(db); } this.pushMergeMetrics = new PushMergeMetrics(); @@ -211,7 +210,7 @@ public boolean shouldLogError(Throwable t) { protected AppShuffleInfo validateAndGetAppShuffleInfo(String appId) { // TODO: [SPARK-33236] Change the message when this service is able to handle NM restart AppShuffleInfo appShuffleInfo = appsShuffleInfo.get(appId); - Preconditions.checkArgument(appShuffleInfo != null, + JavaUtils.checkArgument(appShuffleInfo != null, "application " + appId + " is not registered or NM was restarted."); return appShuffleInfo; } @@ -234,10 +233,10 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo( if (mergePartitionsInfo == null) { logger.info("{} attempt {} shuffle {} shuffleMerge {}: creating a new shuffle " + "merge metadata", - MDC.of(LogKeys.APP_ID$.MODULE$, appShuffleInfo.appId), - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, shuffleId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId)); + MDC.of(LogKeys.APP_ID, appShuffleInfo.appId), + MDC.of(LogKeys.APP_ATTEMPT_ID, appShuffleInfo.attemptId), + MDC.of(LogKeys.SHUFFLE_ID, shuffleId), + MDC.of(LogKeys.SHUFFLE_MERGE_ID, shuffleMergeId)); return new AppShuffleMergePartitionsInfo(shuffleMergeId, false); } else { int latestShuffleMergeId = mergePartitionsInfo.shuffleMergeId; @@ -256,10 +255,10 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo( shuffleId, latestShuffleMergeId); logger.info("{}: creating a new shuffle merge metadata since received " + "shuffleMergeId {} is higher than latest shuffleMergeId {}", - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, currentAppAttemptShuffleMergeId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId), - MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID$.MODULE$, latestShuffleMergeId)); + MDC.of(LogKeys.SHUFFLE_MERGE_ID, shuffleMergeId), + MDC.of(LogKeys.LATEST_SHUFFLE_MERGE_ID, latestShuffleMergeId)); submitCleanupTask(() -> closeAndDeleteOutdatedPartitions(currentAppAttemptShuffleMergeId, mergePartitionsInfo.shuffleMergePartitions)); @@ -293,13 +292,13 @@ AppShufflePartitionInfo getOrCreateAppShufflePartitionInfo( } catch (IOException e) { logger.error("{} attempt {} shuffle {} shuffleMerge {}: cannot create merged shuffle " + "partition with data file {}, index file {}, and meta file {}", - MDC.of(LogKeys.APP_ID$.MODULE$, appShuffleInfo.appId), - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, shuffleId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, shuffleMergeId), - MDC.of(LogKeys.DATA_FILE$.MODULE$, dataFile.getAbsolutePath()), - MDC.of(LogKeys.INDEX_FILE$.MODULE$, indexFile.getAbsolutePath()), - MDC.of(LogKeys.META_FILE$.MODULE$, metaFile.getAbsolutePath())); + MDC.of(LogKeys.APP_ID, appShuffleInfo.appId), + MDC.of(LogKeys.APP_ATTEMPT_ID, appShuffleInfo.attemptId), + MDC.of(LogKeys.SHUFFLE_ID, shuffleId), + MDC.of(LogKeys.SHUFFLE_MERGE_ID, shuffleMergeId), + MDC.of(LogKeys.DATA_FILE, dataFile.getAbsolutePath()), + MDC.of(LogKeys.INDEX_FILE, indexFile.getAbsolutePath()), + MDC.of(LogKeys.META_FILE, metaFile.getAbsolutePath())); throw new RuntimeException( String.format("Cannot initialize merged shuffle partition for appId %s shuffleId %s " + "shuffleMergeId %s reduceId %s", appShuffleInfo.appId, shuffleId, shuffleMergeId, @@ -411,8 +410,8 @@ private void removeOldApplicationAttemptsFromDb(AppShuffleInfo info) { @Override public void applicationRemoved(String appId, boolean cleanupLocalDirs) { logger.info("Application {} removed, cleanupLocalDirs = {}", - MDC.of(LogKeys.APP_ID$.MODULE$, appId), - MDC.of(LogKeys.CLEANUP_LOCAL_DIRS$.MODULE$, cleanupLocalDirs)); + MDC.of(LogKeys.APP_ID, appId), + MDC.of(LogKeys.CLEANUP_LOCAL_DIRS, cleanupLocalDirs)); // Cleanup the DB within critical section to gain the consistency between // DB and in-memory hashmap. AtomicReference ref = new AtomicReference<>(null); @@ -523,7 +522,7 @@ void removeAppAttemptPathInfoFromDB(String appId, int attemptId) { db.delete(key); } catch (Exception e) { logger.error("Failed to remove the application attempt {} local path in DB", e, - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId)); + MDC.of(LogKeys.APP_ATTEMPT_ID, appAttemptId)); } } } @@ -593,10 +592,10 @@ void deleteMergedFiles( } } logger.info("Delete {} data files, {} index files, {} meta files for {}", - MDC.of(LogKeys.NUM_DATA_FILES$.MODULE$, dataFilesDeleteCnt), - MDC.of(LogKeys.NUM_INDEX_FILES$.MODULE$, indexFilesDeleteCnt), - MDC.of(LogKeys.NUM_META_FILES$.MODULE$, metaFilesDeleteCnt), - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId)); + MDC.of(LogKeys.NUM_DATA_FILES, dataFilesDeleteCnt), + MDC.of(LogKeys.NUM_INDEX_FILES, indexFilesDeleteCnt), + MDC.of(LogKeys.NUM_META_FILES, metaFilesDeleteCnt), + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId)); } /** @@ -609,7 +608,7 @@ void removeAppShufflePartitionInfoFromDB(AppAttemptShuffleMergeId appAttemptShuf db.delete(getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId)); } catch (Exception e) { logger.error("Error deleting {} from application shuffle merged partition info in DB", e, - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId)); } } } @@ -629,7 +628,7 @@ void deleteExecutorDirs(AppShuffleInfo appShuffleInfo) { } } catch (Exception e) { logger.error("Failed to delete directory: {}", e, - MDC.of(LogKeys.PATH$.MODULE$, localDir)); + MDC.of(LogKeys.PATH, localDir)); } } } @@ -759,10 +758,10 @@ public ByteBuffer getCompletionResponse() { @Override public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) { logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalize shuffle merge", - MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId), - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId)); + MDC.of(LogKeys.APP_ID, msg.appId), + MDC.of(LogKeys.APP_ATTEMPT_ID, msg.appAttemptId), + MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId), + MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId)); AppShuffleInfo appShuffleInfo = validateAndGetAppShuffleInfo(msg.appId); if (appShuffleInfo.attemptId != msg.appAttemptId) { // If finalizeShuffleMerge from a former application attempt, it is considered late, @@ -846,12 +845,12 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) { } catch (IOException ioe) { logger.warn("{} attempt {} shuffle {} shuffleMerge {}: exception while " + "finalizing shuffle partition {}. Exception message: {}", - MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId), - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, partition.reduceId), - MDC.of(LogKeys.EXCEPTION$.MODULE$, ioe.getMessage())); + MDC.of(LogKeys.APP_ID, msg.appId), + MDC.of(LogKeys.APP_ATTEMPT_ID, msg.appAttemptId), + MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId), + MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId), + MDC.of(LogKeys.REDUCE_ID, partition.reduceId), + MDC.of(LogKeys.EXCEPTION, ioe.getMessage())); } finally { partition.cleanable.clean(); } @@ -863,10 +862,10 @@ public MergeStatuses finalizeShuffleMerge(FinalizeShuffleMerge msg) { appShuffleInfo.shuffles.get(msg.shuffleId).setReduceIds(Ints.toArray(reduceIds)); } logger.info("{} attempt {} shuffle {} shuffleMerge {}: finalization of shuffle merge completed", - MDC.of(LogKeys.APP_ID$.MODULE$, msg.appId), - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, msg.appAttemptId), - MDC.of(LogKeys.SHUFFLE_ID$.MODULE$, msg.shuffleId), - MDC.of(LogKeys.SHUFFLE_MERGE_ID$.MODULE$, msg.shuffleMergeId)); + MDC.of(LogKeys.APP_ID, msg.appId), + MDC.of(LogKeys.APP_ATTEMPT_ID, msg.appAttemptId), + MDC.of(LogKeys.SHUFFLE_ID, msg.shuffleId), + MDC.of(LogKeys.SHUFFLE_MERGE_ID, msg.shuffleMergeId)); return mergeStatuses; } @@ -934,8 +933,8 @@ public void registerExecutor(String appId, ExecutorShuffleInfo executorInfo) { if (originalAppShuffleInfo.get() != null) { AppShuffleInfo appShuffleInfo = originalAppShuffleInfo.get(); logger.warn("Cleanup shuffle info and merged shuffle files for {}_{} as new " + - "application attempt registered", MDC.of(LogKeys.APP_ID$.MODULE$, appId), - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appShuffleInfo.attemptId)); + "application attempt registered", MDC.of(LogKeys.APP_ID, appId), + MDC.of(LogKeys.APP_ATTEMPT_ID, appShuffleInfo.attemptId)); // Clean up all the merge shuffle related information in the DB for the former attempt submitCleanupTask( () -> closeAndDeletePartitionsIfNeeded(appShuffleInfo, true) @@ -992,12 +991,12 @@ private void shutdownMergedShuffleCleanerNow() { List unfinishedTasks = mergedShuffleCleaner.shutdownNow(); logger.warn("There are still {} tasks not completed in mergedShuffleCleaner " + "after {} ms.", - MDC.of(LogKeys.COUNT$.MODULE$, unfinishedTasks.size()), - MDC.of(LogKeys.TIMEOUT$.MODULE$, cleanerShutdownTimeout * 1000L)); + MDC.of(LogKeys.COUNT, unfinishedTasks.size()), + MDC.of(LogKeys.TIMEOUT, cleanerShutdownTimeout * 1000L)); // Wait a while for tasks to respond to being cancelled if (!mergedShuffleCleaner.awaitTermination(cleanerShutdownTimeout, TimeUnit.SECONDS)) { logger.warn("mergedShuffleCleaner did not terminate in {} ms.", - MDC.of(LogKeys.TIMEOUT$.MODULE$, cleanerShutdownTimeout * 1000L)); + MDC.of(LogKeys.TIMEOUT, cleanerShutdownTimeout * 1000L)); } } catch (InterruptedException ignored) { Thread.currentThread().interrupt(); @@ -1017,7 +1016,7 @@ private void writeAppPathsInfoToDb(String appId, int attemptId, AppPathsInfo app db.put(key, value); } catch (Exception e) { logger.error("Error saving registered app paths info for {}", e, - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, appAttemptId)); + MDC.of(LogKeys.APP_ATTEMPT_ID, appAttemptId)); } } } @@ -1035,7 +1034,7 @@ private void writeAppAttemptShuffleMergeInfoToDB( db.put(dbKey, new byte[0]); } catch (Exception e) { logger.error("Error saving active app shuffle partition {}", e, - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId)); } } } @@ -1137,7 +1136,7 @@ List reloadActiveAppAttemptsPathInfo(DB db) throws IOException { dbKeysToBeRemoved.add(getDbAppAttemptPathsKey(existingAppAttemptId)); } catch (IOException e) { logger.error("Failed to get the DB key for {}", e, - MDC.of(LogKeys.APP_ATTEMPT_ID$.MODULE$, existingAppAttemptId)); + MDC.of(LogKeys.APP_ATTEMPT_ID, existingAppAttemptId)); } } return new AppShuffleInfo( @@ -1187,7 +1186,7 @@ List reloadFinalizedAppAttemptsShuffleMergeInfo(DB db) throws IOExceptio getDbAppAttemptShufflePartitionKey(appAttemptShuffleMergeId)); } catch (Exception e) { logger.error("Error getting the DB key for {}", e, MDC.of( - LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId)); + LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId)); } } return new AppShuffleMergePartitionsInfo(partitionId.shuffleMergeId, true); @@ -1216,7 +1215,7 @@ void removeOutdatedKeyValuesInDB(List dbKeysToBeRemoved) { db.delete(key); } catch (Exception e) { logger.error("Error deleting dangling key {} in DB", e, - MDC.of(LogKeys.KEY$.MODULE$, key)); + MDC.of(LogKeys.KEY, key)); } } ); @@ -1267,12 +1266,12 @@ private PushBlockStreamCallback( String streamId, AppShufflePartitionInfo partitionInfo, int mapIndex) { - Preconditions.checkArgument(mergeManager != null); + JavaUtils.checkArgument(mergeManager != null, "mergeManager is null"); this.mergeManager = mergeManager; - Preconditions.checkArgument(appShuffleInfo != null); + JavaUtils.checkArgument(appShuffleInfo != null, "appShuffleInfo is null"); this.appShuffleInfo = appShuffleInfo; this.streamId = streamId; - Preconditions.checkArgument(partitionInfo != null); + JavaUtils.checkArgument(partitionInfo != null, "partitionInfo is null"); this.partitionInfo = partitionInfo; this.mapIndex = mapIndex; abortIfNecessary(); @@ -1599,7 +1598,7 @@ public void onComplete(String streamId) throws IOException { public void onFailure(String streamId, Throwable throwable) throws IOException { if (ERROR_HANDLER.shouldLogError(throwable)) { logger.error("Encountered issue when merging {}", throwable, - MDC.of(LogKeys.STREAM_ID$.MODULE$, streamId)); + MDC.of(LogKeys.STREAM_ID, streamId)); } else { logger.debug("Encountered issue when merging {}", streamId, throwable); } @@ -1719,7 +1718,7 @@ public AppAttemptShuffleMergeId( @JsonProperty("attemptId") int attemptId, @JsonProperty("shuffleId") int shuffleId, @JsonProperty("shuffleMergeId") int shuffleMergeId) { - Preconditions.checkArgument(appId != null, "app id is null"); + JavaUtils.checkArgument(appId != null, "app id is null"); this.appId = appId; this.attemptId = attemptId; this.shuffleId = shuffleId; @@ -1860,8 +1859,8 @@ void updateChunkInfo(long chunkOffset, int mapIndex) throws IOException { indexMetaUpdateFailed = false; } catch (IOException ioe) { logger.warn("{} reduceId {} update to index/meta failed", - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId), + MDC.of(LogKeys.REDUCE_ID, reduceId)); indexMetaUpdateFailed = true; // Any exception here is propagated to the caller and the caller can decide whether to // abort or not. @@ -1913,8 +1912,8 @@ private void finalizePartition() throws IOException { private void deleteAllFiles() { if (!dataFile.delete()) { logger.info("Error deleting data file for {} reduceId {}", - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId), + MDC.of(LogKeys.REDUCE_ID, reduceId)); } metaFile.delete(); indexFile.delete(); @@ -1983,22 +1982,22 @@ private void closeAllFiles( } } catch (IOException ioe) { logger.warn("Error closing data channel for {} reduceId {}", - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId), + MDC.of(LogKeys.REDUCE_ID, reduceId)); } try { metaFile.close(); } catch (IOException ioe) { logger.warn("Error closing meta file for {} reduceId {}", - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId), + MDC.of(LogKeys.REDUCE_ID, reduceId)); } try { indexFile.close(); } catch (IOException ioe) { logger.warn("Error closing index file for {} reduceId {}", - MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID$.MODULE$, appAttemptShuffleMergeId), - MDC.of(LogKeys.REDUCE_ID$.MODULE$, reduceId)); + MDC.of(LogKeys.APP_ATTEMPT_SHUFFLE_MERGE_ID, appAttemptShuffleMergeId), + MDC.of(LogKeys.REDUCE_ID, reduceId)); } } } @@ -2043,9 +2042,9 @@ private AppPathsInfo( this.subDirsPerLocalDir = subDirsPerLocalDir; if (logger.isInfoEnabled()) { logger.info("Updated active local dirs {} and sub dirs {} for application {}", - MDC.of(LogKeys.PATHS$.MODULE$, Arrays.toString(activeLocalDirs)), - MDC.of(LogKeys.NUM_SUB_DIRS$.MODULE$, subDirsPerLocalDir), - MDC.of(LogKeys.APP_ID$.MODULE$, appId)); + MDC.of(LogKeys.PATHS, Arrays.toString(activeLocalDirs)), + MDC.of(LogKeys.NUM_SUB_DIRS, subDirsPerLocalDir), + MDC.of(LogKeys.APP_ID, appId)); } } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java index 31c454f63a92e..1dae2d54120cb 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/RetryingBlockTransferor.java @@ -25,8 +25,6 @@ import java.util.concurrent.TimeUnit; import com.google.common.annotations.VisibleForTesting; -import com.google.common.base.Preconditions; -import com.google.common.collect.Sets; import com.google.common.util.concurrent.Uninterruptibles; import org.apache.spark.internal.SparkLogger; @@ -35,6 +33,7 @@ import org.apache.spark.internal.MDC; import org.apache.spark.network.buffer.ManagedBuffer; import org.apache.spark.network.sasl.SaslTimeoutException; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.NettyUtils; import org.apache.spark.network.util.TransportConf; @@ -131,7 +130,7 @@ public RetryingBlockTransferor( this.listener = listener; this.maxRetries = conf.maxIORetries(); this.retryWaitTime = conf.ioRetryWaitTimeMs(); - this.outstandingBlocksIds = Sets.newLinkedHashSet(); + this.outstandingBlocksIds = new LinkedHashSet<>(); Collections.addAll(outstandingBlocksIds, blockIds); this.currentListener = new RetryingBlockTransferListener(); this.errorHandler = errorHandler; @@ -182,13 +181,13 @@ private void transferAllOutstanding() { } catch (Exception e) { if (numRetries > 0) { logger.error("Exception while beginning {} of {} outstanding blocks (after {} retries)", e, - MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()), - MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, blockIdsToTransfer.length), - MDC.of(LogKeys.NUM_RETRY$.MODULE$, numRetries)); + MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()), + MDC.of(LogKeys.NUM_BLOCKS, blockIdsToTransfer.length), + MDC.of(LogKeys.NUM_RETRY, numRetries)); } else { logger.error("Exception while beginning {} of {} outstanding blocks", e, - MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()), - MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, blockIdsToTransfer.length)); + MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()), + MDC.of(LogKeys.NUM_BLOCKS, blockIdsToTransfer.length)); } if (shouldRetry(e) && initiateRetry(e)) { // successfully initiated a retry @@ -216,11 +215,11 @@ synchronized boolean initiateRetry(Throwable e) { currentListener = new RetryingBlockTransferListener(); logger.info("Retrying {} ({}/{}) for {} outstanding blocks after {} ms", - MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()), - MDC.of(LogKeys.NUM_RETRY$.MODULE$, retryCount), - MDC.of(LogKeys.MAX_ATTEMPTS$.MODULE$, maxRetries), - MDC.of(LogKeys.NUM_BLOCKS$.MODULE$, outstandingBlocksIds.size()), - MDC.of(LogKeys.RETRY_WAIT_TIME$.MODULE$, retryWaitTime)); + MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()), + MDC.of(LogKeys.NUM_RETRY, retryCount), + MDC.of(LogKeys.MAX_ATTEMPTS, maxRetries), + MDC.of(LogKeys.NUM_BLOCKS, outstandingBlocksIds.size()), + MDC.of(LogKeys.RETRY_WAIT_TIME, retryWaitTime)); try { executorService.execute(() -> { @@ -247,7 +246,7 @@ private synchronized boolean shouldRetry(Throwable e) { // If this is a non SASL request failure, reduce earlier SASL failures from retryCount // since some subsequent SASL attempt was successful if (!isSaslTimeout && saslRetryCount > 0) { - Preconditions.checkState(retryCount >= saslRetryCount, + JavaUtils.checkState(retryCount >= saslRetryCount, "retryCount must be greater than or equal to saslRetryCount"); retryCount -= saslRetryCount; saslRetryCount = 0; @@ -282,7 +281,7 @@ private void handleBlockTransferSuccess(String blockId, ManagedBuffer data) { // If there were SASL failures earlier, remove them from retryCount, as there was // a SASL success (and some other request post bootstrap was also successful). if (saslRetryCount > 0) { - Preconditions.checkState(retryCount >= saslRetryCount, + JavaUtils.checkState(retryCount >= saslRetryCount, "retryCount must be greater than or equal to saslRetryCount"); retryCount -= saslRetryCount; saslRetryCount = 0; @@ -311,9 +310,9 @@ private void handleBlockTransferFailure(String blockId, Throwable exception) { } else { if (errorHandler.shouldLogError(exception)) { logger.error("Failed to {} block {}, and will not retry ({} retries)", exception, - MDC.of(LogKeys.TRANSFER_TYPE$.MODULE$, listener.getTransferType()), - MDC.of(LogKeys.BLOCK_ID$.MODULE$, blockId), - MDC.of(LogKeys.NUM_RETRY$.MODULE$,retryCount)); + MDC.of(LogKeys.TRANSFER_TYPE, listener.getTransferType()), + MDC.of(LogKeys.BLOCK_ID, blockId), + MDC.of(LogKeys.NUM_RETRY,retryCount)); } else { logger.debug( String.format("Failed to %s block %s, and will not retry (%s retries)", diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java index 62fcda701d948..2dbf38be954db 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/checksum/ShuffleChecksumHelper.java @@ -21,8 +21,6 @@ import java.util.concurrent.TimeUnit; import java.util.zip.*; -import com.google.common.io.ByteStreams; - import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; import org.apache.spark.internal.LogKeys; @@ -88,7 +86,7 @@ public static String getChecksumFileName(String blockName, String algorithm) { private static long readChecksumByReduceId(File checksumFile, int reduceId) throws IOException { try (DataInputStream in = new DataInputStream(new FileInputStream(checksumFile))) { - ByteStreams.skipFully(in, reduceId * 8L); + in.skipNBytes(reduceId * 8L); return in.readLong(); } } @@ -156,7 +154,7 @@ public static Cause diagnoseCorruption( } catch (FileNotFoundException e) { // Even if checksum is enabled, a checksum file may not exist if error throws during writing. logger.warn("Checksum file {} doesn't exit", - MDC.of(LogKeys.PATH$.MODULE$, checksumFile.getName())); + MDC.of(LogKeys.PATH, checksumFile.getName())); cause = Cause.UNKNOWN_ISSUE; } catch (Exception e) { logger.warn("Unable to diagnose shuffle block corruption", e); @@ -169,9 +167,9 @@ public static Cause diagnoseCorruption( checksumByReader, checksumByWriter, checksumByReCalculation); } else { logger.info("Shuffle corruption diagnosis took {} ms, checksum file {}, cause {}", - MDC.of(LogKeys.TIME$.MODULE$, duration), - MDC.of(LogKeys.PATH$.MODULE$, checksumFile.getAbsolutePath()), - MDC.of(LogKeys.REASON$.MODULE$, cause)); + MDC.of(LogKeys.TIME, duration), + MDC.of(LogKeys.PATH, checksumFile.getAbsolutePath()), + MDC.of(LogKeys.REASON, cause)); } return cause; } diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java index 0fca27cf26dfa..2bc57cc52f2cd 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/AbstractFetchShuffleBlocks.java @@ -17,7 +17,8 @@ package org.apache.spark.network.shuffle.protocol; -import com.google.common.base.Objects; +import java.util.Objects; + import io.netty.buffer.ByteBuf; import org.apache.commons.lang3.builder.ToStringBuilder; @@ -43,12 +44,14 @@ protected AbstractFetchShuffleBlocks( this.shuffleId = shuffleId; } + // checkstyle.off: RegexpSinglelineJava public ToStringBuilder toStringHelper() { return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) .append("appId", appId) .append("execId", execId) .append("shuffleId", shuffleId); } + // checkstyle.on: RegexpSinglelineJava /** * Returns number of blocks in the request. @@ -61,7 +64,7 @@ public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; AbstractFetchShuffleBlocks that = (AbstractFetchShuffleBlocks) o; return shuffleId == that.shuffleId - && Objects.equal(appId, that.appId) && Objects.equal(execId, that.execId); + && Objects.equals(appId, that.appId) && Objects.equals(execId, that.execId); } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java index 05347c671e002..f4149b6875b26 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockPushReturnCode.java @@ -19,10 +19,7 @@ import java.util.Objects; -import com.google.common.base.Preconditions; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; import org.apache.spark.network.server.BlockPushNonFatalFailure; @@ -43,7 +40,7 @@ public class BlockPushReturnCode extends BlockTransferMessage { public final String failureBlockId; public BlockPushReturnCode(byte returnCode, String failureBlockId) { - Preconditions.checkNotNull(BlockPushNonFatalFailure.getReturnCode(returnCode)); + Objects.requireNonNull(BlockPushNonFatalFailure.getReturnCode(returnCode)); this.returnCode = returnCode; this.failureBlockId = failureBlockId; } @@ -60,10 +57,8 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("returnCode", returnCode) - .append("failureBlockId", failureBlockId) - .toString(); + return "BlockPushReturnCode[returnCode=" + returnCode + + ",failureBlockId=" + failureBlockId + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java index 2a050ce40b84b..9942d68297595 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlocksRemoved.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; // Needed by ScalaDoc. See SPARK-7726 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; @@ -44,9 +42,7 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("numRemovedBlocks", numRemovedBlocks) - .toString(); + return "BlocksRemoved[numRemovedBlocks=" + numRemovedBlocks + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java index 5690eee53bd13..d9b9d4d8f36c2 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/CorruptionCause.java @@ -18,8 +18,6 @@ package org.apache.spark.network.shuffle.protocol; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.shuffle.checksum.Cause; @@ -38,9 +36,7 @@ protected Type type() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("cause", cause) - .toString(); + return "CorruptionCause[cause=" + cause + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java index 620b5ad71cd75..e509f45a9f0e3 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/DiagnoseCorruption.java @@ -18,8 +18,6 @@ package org.apache.spark.network.shuffle.protocol; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; /** Request to get the cause of a corrupted block. Returns {@link CorruptionCause} */ @@ -56,15 +54,9 @@ protected Type type() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execId", execId) - .append("shuffleId", shuffleId) - .append("mapId", mapId) - .append("reduceId", reduceId) - .append("checksum", checksum) - .append("algorithm", algorithm) - .toString(); + return "DiagnoseCorruption[appId=" + appId + ",execId=" + execId + ",shuffleId=" + shuffleId + + ",mapId=" + mapId + ",reduceId=" + reduceId + ",checksum=" + checksum + + ",algorithm=" + algorithm + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java index 8a3ccdef2920b..c53ab911c30da 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/ExecutorShuffleInfo.java @@ -23,8 +23,6 @@ import com.fasterxml.jackson.annotation.JsonCreator; import com.fasterxml.jackson.annotation.JsonProperty; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encodable; import org.apache.spark.network.protocol.Encoders; @@ -60,11 +58,8 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("localDirs", Arrays.toString(localDirs)) - .append("subDirsPerLocalDir", subDirsPerLocalDir) - .append("shuffleManager", shuffleManager) - .toString(); + return "ExecutorShuffleInfo[localDirs=" + Arrays.toString(localDirs) + + ",subDirsPerLocalDir=" + subDirsPerLocalDir + ",shuffleManager=" + shuffleManager + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java index cf4cbcf1ed08e..a6e1ce374b07f 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlockChunks.java @@ -60,11 +60,10 @@ public FetchShuffleBlockChunks( @Override public String toString() { - return toStringHelper() - .append("shuffleMergeId", shuffleMergeId) - .append("reduceIds", Arrays.toString(reduceIds)) - .append("chunkIds", Arrays.deepToString(chunkIds)) - .toString(); + return "FetchShuffleBlockChunks[appId=" + appId + ",execId=" + execId + + ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + + ",reduceIds=" + Arrays.toString(reduceIds) + + ",chunkIds=" + Arrays.deepToString(chunkIds) + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java index 68550a2fba86e..686207767ca1e 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FetchShuffleBlocks.java @@ -62,11 +62,9 @@ public FetchShuffleBlocks( @Override public String toString() { - return toStringHelper() - .append("mapIds", Arrays.toString(mapIds)) - .append("reduceIds", Arrays.deepToString(reduceIds)) - .append("batchFetchEnabled", batchFetchEnabled) - .toString(); + return "FetchShuffleBlocks[appId=" + appId + ",execId=" + execId + ",shuffleId=" + shuffleId + + ",mapIds=" + Arrays.toString(mapIds) + ",reduceIds=" + Arrays.deepToString(reduceIds) + + ",batchFetchEnabled=" + batchFetchEnabled + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java index cd5e005348f42..61152f48a85ba 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/FinalizeShuffleMerge.java @@ -17,10 +17,9 @@ package org.apache.spark.network.shuffle.protocol; -import com.google.common.base.Objects; +import java.util.Objects; + import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -54,23 +53,19 @@ protected BlockTransferMessage.Type type() { @Override public int hashCode() { - return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId); + return Objects.hash(appId, appAttemptId, shuffleId, shuffleMergeId); } @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("attemptId", appAttemptId) - .append("shuffleId", shuffleId) - .append("shuffleMergeId", shuffleMergeId) - .toString(); + return "FinalizeShuffleMerge[appId=" + appId + ",attemptId=" + appAttemptId + + ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + "]"; } @Override public boolean equals(Object other) { if (other instanceof FinalizeShuffleMerge o) { - return Objects.equal(appId, o.appId) + return Objects.equals(appId, o.appId) && appAttemptId == o.appAttemptId && shuffleId == o.shuffleId && shuffleMergeId == o.shuffleMergeId; diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java index f118f0604d9e9..8bd106c94c283 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/GetLocalDirsForExecutors.java @@ -21,8 +21,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -49,10 +47,7 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execIds", Arrays.toString(execIds)) - .toString(); + return "GetLocalDirsForExecutors[appId=" + appId + ",execIds=" + Arrays.toString(execIds) + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java index b65f351d3cf3e..060b565d420fd 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/LocalDirsForExecutors.java @@ -20,8 +20,6 @@ import java.util.*; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -64,11 +62,9 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("execIds", Arrays.toString(execIds)) - .append("numLocalDirsByExec", Arrays.toString(numLocalDirsByExec)) - .append("allLocalDirs", Arrays.toString(allLocalDirs)) - .toString(); + return "LocalDirsForExecutors[execIds=" + Arrays.toString(execIds) + + ",numLocalDirsByExec=" + Arrays.toString(numLocalDirsByExec) + + ",allLocalDirs=" + Arrays.toString(allLocalDirs) + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java index 892c3a5e77958..d21449016972f 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/MergeStatuses.java @@ -19,10 +19,9 @@ import java.util.Arrays; -import com.google.common.base.Objects; +import java.util.Objects; + import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.roaringbitmap.RoaringBitmap; import org.apache.spark.network.protocol.Encoders; @@ -86,18 +85,15 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("shuffleId", shuffleId) - .append("shuffleMergeId", shuffleMergeId) - .append("reduceId size", reduceIds.length) - .toString(); + return "MergeStatuses[shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + + ",reduceId size=" + reduceIds.length + "]"; } @Override public boolean equals(Object other) { if (other instanceof MergeStatuses o) { - return Objects.equal(shuffleId, o.shuffleId) - && Objects.equal(shuffleMergeId, o.shuffleMergeId) + return Objects.equals(shuffleId, o.shuffleId) + && Objects.equals(shuffleMergeId, o.shuffleMergeId) && Arrays.equals(bitmaps, o.bitmaps) && Arrays.equals(reduceIds, o.reduceIds) && Arrays.equals(sizes, o.sizes); diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java index 49288eef5c5de..87b40eb1fc6ac 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/OpenBlocks.java @@ -21,8 +21,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -51,11 +49,8 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execId", execId) - .append("blockIds", Arrays.toString(blockIds)) - .toString(); + return "OpenBlocks[appId=" + appId + ",execId=" + execId + ",blockIds=" + + Arrays.toString(blockIds) + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java index ceab54a1c0615..20e6e79c31980 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/PushBlockStream.java @@ -17,11 +17,9 @@ package org.apache.spark.network.shuffle.protocol; -import com.google.common.base.Objects; -import io.netty.buffer.ByteBuf; +import java.util.Objects; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; +import io.netty.buffer.ByteBuf; import org.apache.spark.network.protocol.Encoders; @@ -68,27 +66,21 @@ protected Type type() { @Override public int hashCode() { - return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId, mapIndex , reduceId, + return Objects.hash(appId, appAttemptId, shuffleId, shuffleMergeId, mapIndex , reduceId, index); } @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("attemptId", appAttemptId) - .append("shuffleId", shuffleId) - .append("shuffleMergeId", shuffleMergeId) - .append("mapIndex", mapIndex) - .append("reduceId", reduceId) - .append("index", index) - .toString(); + return "PushBlockStream[appId=" + appId + ",attemptId=" + appAttemptId + + ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + ",mapIndex=" + mapIndex + + ",reduceId=" + reduceId + ",index=" + index + "]"; } @Override public boolean equals(Object other) { if (other instanceof PushBlockStream o) { - return Objects.equal(appId, o.appId) + return Objects.equals(appId, o.appId) && appAttemptId == o.appAttemptId && shuffleId == o.shuffleId && shuffleMergeId == o.shuffleMergeId diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java index 9805af67b9f26..a5931126e4ff8 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterExecutor.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -56,11 +54,8 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execId", execId) - .append("executorInfo", executorInfo) - .toString(); + return "RegisterExecutor[appId=" + appId + ", execId=" + execId + + ",executorInfo=" + executorInfo + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java index 7032942331c3e..2743824b3d21c 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveBlocks.java @@ -21,8 +21,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -51,11 +49,8 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execId", execId) - .append("blockIds", Arrays.toString(blockIds)) - .toString(); + return "RemoveBlocks[appId=" + appId + ",execId=" + execId + + ",blockIds=" + Arrays.toString(blockIds) + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java index 8ce2e05e6097d..ac6d981b2e081 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RemoveShuffleMerge.java @@ -17,10 +17,9 @@ package org.apache.spark.network.shuffle.protocol; -import com.google.common.base.Objects; +import java.util.Objects; + import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -54,23 +53,19 @@ protected Type type() { @Override public int hashCode() { - return Objects.hashCode(appId, appAttemptId, shuffleId, shuffleMergeId); + return Objects.hash(appId, appAttemptId, shuffleId, shuffleMergeId); } @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("attemptId", appAttemptId) - .append("shuffleId", shuffleId) - .append("shuffleMergeId", shuffleMergeId) - .toString(); + return "RemoveShuffleMerge[appId=" + appId + ",attemptId=" + appAttemptId + + ",shuffleId=" + shuffleId + ",shuffleMergeId=" + shuffleMergeId + "]"; } @Override public boolean equals(Object other) { if (other != null && other instanceof RemoveShuffleMerge o) { - return Objects.equal(appId, o.appId) + return Objects.equals(appId, o.appId) && appAttemptId == o.appAttemptId && shuffleId == o.shuffleId && shuffleMergeId == o.shuffleMergeId; diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java index aebd6f0d5a620..629e3c472e0a6 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/StreamHandle.java @@ -20,8 +20,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; // Needed by ScalaDoc. See SPARK-7726 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; @@ -49,10 +47,7 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("streamId", streamId) - .append("numChunks", numChunks) - .toString(); + return "StreamHandle[streamId=" + streamId + ",numChunks=" + numChunks + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java index fad187971e09a..9222134e6bb7f 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlock.java @@ -21,8 +21,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -68,13 +66,8 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("appId", appId) - .append("execId", execId) - .append("blockId", blockId) - .append("metadata size", metadata.length) - .append("block size", blockData.length) - .toString(); + return "UploadBlock[appId=" + appId + ",execId=" + execId + ",blockId=" + blockId + + ",metadata size=" + metadata.length + ",block size=" + blockData.length + "]"; } @Override diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java index 95d0b3835562d..45c4c5f98de74 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/UploadBlockStream.java @@ -21,8 +21,6 @@ import java.util.Objects; import io.netty.buffer.ByteBuf; -import org.apache.commons.lang3.builder.ToStringBuilder; -import org.apache.commons.lang3.builder.ToStringStyle; import org.apache.spark.network.protocol.Encoders; @@ -55,10 +53,7 @@ public int hashCode() { @Override public String toString() { - return new ToStringBuilder(this, ToStringStyle.SHORT_PREFIX_STYLE) - .append("blockId", blockId) - .append("metadata size", metadata.length) - .toString(); + return "UploadBlockStream[blockId=" + blockId + ",metadata size=" + metadata.length + "]"; } @Override diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java index ccb464c2ce5bd..0c091d88f98b5 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/CleanupNonShuffleServiceServedFilesSuite.java @@ -20,21 +20,17 @@ import java.io.File; import java.io.IOException; import java.nio.charset.StandardCharsets; -import java.nio.file.Files; -import java.nio.file.Path; import java.util.*; import java.util.concurrent.Executor; import java.util.concurrent.atomic.AtomicBoolean; import java.util.stream.Collectors; -import java.util.stream.Stream; -import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import org.junit.jupiter.api.Test; import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertTrue; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.MapConfigProvider; import org.apache.spark.network.util.TransportConf; @@ -46,15 +42,15 @@ public class CleanupNonShuffleServiceServedFilesSuite { private static final String SORT_MANAGER = "org.apache.spark.shuffle.sort.SortShuffleManager"; private static Set expectedShuffleFilesToKeep = - ImmutableSet.of("shuffle_782_450_0.index", "shuffle_782_450_0.data"); + Set.of("shuffle_782_450_0.index", "shuffle_782_450_0.data"); private static Set expectedShuffleAndRddFilesToKeep = - ImmutableSet.of("shuffle_782_450_0.index", "shuffle_782_450_0.data", "rdd_12_34"); + Set.of("shuffle_782_450_0.index", "shuffle_782_450_0.data", "rdd_12_34"); private TransportConf getConf(boolean isFetchRddEnabled) { return new TransportConf( "shuffle", - new MapConfigProvider(ImmutableMap.of( + new MapConfigProvider(Map.of( Constants.SHUFFLE_SERVICE_FETCH_RDD_ENABLED, Boolean.toString(isFetchRddEnabled)))); } @@ -200,28 +196,13 @@ private static void assertStillThere(TestShuffleDataContext dataContext) { } } - private static Set collectFilenames(File[] files) throws IOException { - Set result = new HashSet<>(); - for (File file : files) { - if (file.exists()) { - try (Stream walk = Files.walk(file.toPath())) { - result.addAll(walk - .filter(Files::isRegularFile) - .map(x -> x.toFile().getName()) - .collect(Collectors.toSet())); - } - } - } - return result; - } - private static void assertContainedFilenames( TestShuffleDataContext dataContext, Set expectedFilenames) throws IOException { Set collectedFilenames = new HashSet<>(); for (String localDir : dataContext.localDirs) { - File[] dirs = new File[] { new File(localDir) }; - collectedFilenames.addAll(collectFilenames(dirs)); + JavaUtils.listFiles(new File(localDir)).stream().map(File::getName) + .collect(Collectors.toCollection(() -> collectedFilenames)); } assertEquals(expectedFilenames, collectedFilenames); } diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java index f7edc8837fde7..2a3135e3c8aeb 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalBlockHandlerSuite.java @@ -27,7 +27,6 @@ import com.codahale.metrics.Meter; import com.codahale.metrics.Metric; import com.codahale.metrics.Timer; -import com.google.common.io.ByteStreams; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.ArgumentCaptor; @@ -136,7 +135,7 @@ private void checkDiagnosisResult( CheckedInputStream checkedIn = new CheckedInputStream( blockMarkers[0].createInputStream(), checksum); byte[] buffer = new byte[10]; - ByteStreams.readFully(checkedIn, buffer, 0, (int) blockMarkers[0].size()); + JavaUtils.readFully(checkedIn, buffer, 0, (int) blockMarkers[0].size()); long checksumByWriter = checkedIn.getChecksum().getValue(); // when checksumByWriter == checksumRecalculated and checksumByReader != checksumByWriter diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java index 311827dbed4c5..488d02d63d552 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleBlockResolverSuite.java @@ -19,12 +19,11 @@ import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.nio.charset.StandardCharsets; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.io.CharStreams; import org.apache.spark.network.shuffle.protocol.ExecutorShuffleInfo; +import org.apache.spark.network.util.JavaUtils; import org.apache.spark.network.util.MapConfigProvider; import org.apache.spark.network.util.TransportConf; import org.apache.spark.network.shuffle.ExternalShuffleBlockResolver.AppExecId; @@ -83,23 +82,17 @@ public void testSortShuffleBlocks() throws IOException { try (InputStream block0Stream = resolver.getBlockData( "app0", "exec0", 0, 0, 0).createInputStream()) { - String block0 = - CharStreams.toString(new InputStreamReader(block0Stream, StandardCharsets.UTF_8)); - assertEquals(sortBlock0, block0); + assertEquals(sortBlock0, JavaUtils.toString(block0Stream)); } try (InputStream block1Stream = resolver.getBlockData( "app0", "exec0", 0, 0, 1).createInputStream()) { - String block1 = - CharStreams.toString(new InputStreamReader(block1Stream, StandardCharsets.UTF_8)); - assertEquals(sortBlock1, block1); + assertEquals(sortBlock1, JavaUtils.toString(block1Stream)); } try (InputStream blocksStream = resolver.getContinuousBlocksData( "app0", "exec0", 0, 0, 0, 2).createInputStream()) { - String blocks = - CharStreams.toString(new InputStreamReader(blocksStream, StandardCharsets.UTF_8)); - assertEquals(sortBlock0 + sortBlock1, blocks); + assertEquals(sortBlock0 + sortBlock1, JavaUtils.toString(blocksStream)); } } diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java index ec71f83ba743c..59381cabe063a 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleIntegrationSuite.java @@ -32,7 +32,6 @@ import java.util.concurrent.Semaphore; import java.util.concurrent.TimeUnit; -import com.google.common.collect.Sets; import org.apache.spark.network.buffer.FileSegmentManagedBuffer; import org.apache.spark.network.server.OneForOneStreamManager; import org.junit.jupiter.api.AfterAll; @@ -222,7 +221,7 @@ public void testFetchOneSort() throws Exception { try (ExternalBlockStoreClient client = createExternalBlockStoreClient()) { registerExecutor(client, "exec-0", dataContext0.createExecutorInfo(SORT_MANAGER)); FetchResult exec0Fetch = fetchBlocks("exec-0", new String[] { "shuffle_0_0_0" }); - assertEquals(Sets.newHashSet("shuffle_0_0_0"), exec0Fetch.successBlocks); + assertEquals(Set.of("shuffle_0_0_0"), exec0Fetch.successBlocks); assertTrue(exec0Fetch.failedBlocks.isEmpty()); assertBufferListsEqual(exec0Fetch.buffers, Arrays.asList(exec0Blocks[0])); exec0Fetch.releaseBuffers(); @@ -235,7 +234,7 @@ public void testFetchThreeSort() throws Exception { registerExecutor(client,"exec-0", dataContext0.createExecutorInfo(SORT_MANAGER)); FetchResult exec0Fetch = fetchBlocks("exec-0", new String[]{"shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"}); - assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"), + assertEquals(Set.of("shuffle_0_0_0", "shuffle_0_0_1", "shuffle_0_0_2"), exec0Fetch.successBlocks); assertTrue(exec0Fetch.failedBlocks.isEmpty()); assertBufferListsEqual(exec0Fetch.buffers, Arrays.asList(exec0Blocks)); @@ -256,7 +255,7 @@ public void testFetchWrongBlockId() throws Exception { registerExecutor(client, "exec-1", dataContext0.createExecutorInfo(SORT_MANAGER)); FetchResult execFetch = fetchBlocks("exec-1", new String[]{"broadcast_1"}); assertTrue(execFetch.successBlocks.isEmpty()); - assertEquals(Sets.newHashSet("broadcast_1"), execFetch.failedBlocks); + assertEquals(Set.of("broadcast_1"), execFetch.failedBlocks); } } @@ -267,7 +266,7 @@ public void testFetchValidRddBlock() throws Exception { String validBlockId = "rdd_" + RDD_ID + "_" + SPLIT_INDEX_VALID_BLOCK; FetchResult execFetch = fetchBlocks("exec-1", new String[]{validBlockId}); assertTrue(execFetch.failedBlocks.isEmpty()); - assertEquals(Sets.newHashSet(validBlockId), execFetch.successBlocks); + assertEquals(Set.of(validBlockId), execFetch.successBlocks); assertBuffersEqual(new NioManagedBuffer(ByteBuffer.wrap(exec0RddBlockValid)), execFetch.buffers.get(0)); } @@ -280,7 +279,7 @@ public void testFetchDeletedRddBlock() throws Exception { String missingBlockId = "rdd_" + RDD_ID + "_" + SPLIT_INDEX_MISSING_FILE; FetchResult execFetch = fetchBlocks("exec-1", new String[]{missingBlockId}); assertTrue(execFetch.successBlocks.isEmpty()); - assertEquals(Sets.newHashSet(missingBlockId), execFetch.failedBlocks); + assertEquals(Set.of(missingBlockId), execFetch.failedBlocks); } } @@ -310,7 +309,7 @@ public void testFetchCorruptRddBlock() throws Exception { String corruptBlockId = "rdd_" + RDD_ID + "_" + SPLIT_INDEX_CORRUPT_LENGTH; FetchResult execFetch = fetchBlocks("exec-1", new String[]{corruptBlockId}); assertTrue(execFetch.successBlocks.isEmpty()); - assertEquals(Sets.newHashSet(corruptBlockId), execFetch.failedBlocks); + assertEquals(Set.of(corruptBlockId), execFetch.failedBlocks); } } @@ -321,7 +320,7 @@ public void testFetchNonexistent() throws Exception { FetchResult execFetch = fetchBlocks("exec-0", new String[]{"shuffle_2_0_0"}); assertTrue(execFetch.successBlocks.isEmpty()); - assertEquals(Sets.newHashSet("shuffle_2_0_0"), execFetch.failedBlocks); + assertEquals(Set.of("shuffle_2_0_0"), execFetch.failedBlocks); } } @@ -331,8 +330,8 @@ public void testFetchWrongExecutor() throws Exception { registerExecutor(client,"exec-0", dataContext0.createExecutorInfo(SORT_MANAGER)); FetchResult execFetch0 = fetchBlocks("exec-0", new String[]{"shuffle_0_0_0" /* right */}); FetchResult execFetch1 = fetchBlocks("exec-0", new String[]{"shuffle_1_0_0" /* wrong */}); - assertEquals(Sets.newHashSet("shuffle_0_0_0"), execFetch0.successBlocks); - assertEquals(Sets.newHashSet("shuffle_1_0_0"), execFetch1.failedBlocks); + assertEquals(Set.of("shuffle_0_0_0"), execFetch0.successBlocks); + assertEquals(Set.of("shuffle_1_0_0"), execFetch1.failedBlocks); } } @@ -343,7 +342,7 @@ public void testFetchUnregisteredExecutor() throws Exception { FetchResult execFetch = fetchBlocks("exec-2", new String[]{"shuffle_0_0_0", "shuffle_1_0_0"}); assertTrue(execFetch.successBlocks.isEmpty()); - assertEquals(Sets.newHashSet("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks); + assertEquals(Set.of("shuffle_0_0_0", "shuffle_1_0_0"), execFetch.failedBlocks); } } @@ -355,7 +354,7 @@ public void testFetchNoServer() throws Exception { FetchResult execFetch = fetchBlocks("exec-0", new String[]{"shuffle_1_0_0", "shuffle_1_0_1"}, clientConf, 1 /* port */); assertTrue(execFetch.successBlocks.isEmpty()); - assertEquals(Sets.newHashSet("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks); + assertEquals(Set.of("shuffle_1_0_0", "shuffle_1_0_1"), execFetch.failedBlocks); } } diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java index 76f82800c502a..170b72b409e12 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ExternalShuffleSecuritySuite.java @@ -19,8 +19,8 @@ import java.io.IOException; import java.util.Arrays; +import java.util.Map; -import com.google.common.collect.ImmutableMap; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; @@ -46,7 +46,7 @@ public class ExternalShuffleSecuritySuite { protected TransportConf createTransportConf(boolean encrypt) { if (encrypt) { return new TransportConf("shuffle", new MapConfigProvider( - ImmutableMap.of("spark.authenticate.enableSaslEncryption", "true"))); + Map.of("spark.authenticate.enableSaslEncryption", "true"))); } else { return new TransportConf("shuffle", MapConfigProvider.EMPTY); } diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java index 7151d044105c7..f127568c8a333 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockFetcherSuite.java @@ -23,7 +23,6 @@ import java.util.LinkedHashMap; import java.util.concurrent.atomic.AtomicInteger; -import com.google.common.collect.Maps; import io.netty.buffer.Unpooled; import org.junit.jupiter.api.Test; @@ -57,7 +56,7 @@ public class OneForOneBlockFetcherSuite { @Test public void testFetchOne() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); @@ -72,7 +71,7 @@ public void testFetchOne() { @Test public void testUseOldProtocol() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); @@ -91,7 +90,7 @@ public void testUseOldProtocol() { @Test public void testFetchThreeShuffleBlocks() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("shuffle_0_0_1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23]))); blocks.put("shuffle_0_0_2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23]))); @@ -112,7 +111,7 @@ public void testFetchThreeShuffleBlocks() { @Test public void testBatchFetchThreeShuffleBlocks() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffle_0_0_0_3", new NioManagedBuffer(ByteBuffer.wrap(new byte[58]))); String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); @@ -129,7 +128,7 @@ public void testBatchFetchThreeShuffleBlocks() { @Test public void testFetchThree() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("b1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23]))); blocks.put("b2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[23]))); @@ -148,7 +147,7 @@ public void testFetchThree() { @Test public void testFailure() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("b1", null); blocks.put("b2", null); @@ -168,7 +167,7 @@ public void testFailure() { @Test public void testFailureAndSuccess() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("b0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("b1", null); blocks.put("b2", new NioManagedBuffer(ByteBuffer.wrap(new byte[21]))); @@ -190,14 +189,14 @@ public void testFailureAndSuccess() { @Test public void testEmptyBlockFetch() { IllegalArgumentException e = assertThrows(IllegalArgumentException.class, - () -> fetchBlocks(Maps.newLinkedHashMap(), new String[] {}, + () -> fetchBlocks(new LinkedHashMap<>(), new String[] {}, new OpenBlocks("app-id", "exec-id", new String[] {}), conf)); assertEquals("Zero-sized blockIds array", e.getMessage()); } @Test public void testFetchShuffleBlocksOrder() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffle_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1]))); blocks.put("shuffle_0_2_1", new NioManagedBuffer(ByteBuffer.wrap(new byte[2]))); blocks.put("shuffle_0_10_2", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[3]))); @@ -217,7 +216,7 @@ public void testFetchShuffleBlocksOrder() { @Test public void testBatchFetchShuffleBlocksOrder() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffle_0_0_1_2", new NioManagedBuffer(ByteBuffer.wrap(new byte[1]))); blocks.put("shuffle_0_2_2_3", new NioManagedBuffer(ByteBuffer.wrap(new byte[2]))); blocks.put("shuffle_0_10_3_4", new NettyManagedBuffer(Unpooled.wrappedBuffer(new byte[3]))); @@ -237,7 +236,7 @@ public void testBatchFetchShuffleBlocksOrder() { @Test public void testShuffleBlockChunksFetch() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffleChunk_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("shuffleChunk_0_0_0_1", new NioManagedBuffer(ByteBuffer.wrap(new byte[23]))); blocks.put("shuffleChunk_0_0_0_2", @@ -255,7 +254,7 @@ public void testShuffleBlockChunksFetch() { @Test public void testShuffleBlockChunkFetchFailure() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shuffleChunk_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("shuffleChunk_0_0_0_1", null); blocks.put("shuffleChunk_0_0_0_2", diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java index 32c6a8cd37eae..345ac7546af48 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/OneForOneBlockPusherSuite.java @@ -23,7 +23,6 @@ import java.util.LinkedHashMap; import java.util.Map; -import com.google.common.collect.Maps; import io.netty.buffer.Unpooled; import org.junit.jupiter.api.Test; @@ -47,7 +46,7 @@ public class OneForOneBlockPusherSuite { @Test public void testPushOne() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[1]))); String[] blockIds = blocks.keySet().toArray(new String[blocks.size()]); @@ -61,7 +60,7 @@ public void testPushOne() { @Test public void testPushThree() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("shufflePush_0_0_1_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[23]))); blocks.put("shufflePush_0_0_2_0", @@ -82,7 +81,7 @@ public void testPushThree() { @Test public void testServerFailures() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("shufflePush_0_0_1_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); blocks.put("shufflePush_0_0_2_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); @@ -102,7 +101,7 @@ public void testServerFailures() { @Test public void testHandlingRetriableFailures() { - LinkedHashMap blocks = Maps.newLinkedHashMap(); + LinkedHashMap blocks = new LinkedHashMap<>(); blocks.put("shufflePush_0_0_0_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[12]))); blocks.put("shufflePush_0_0_1_0", null); blocks.put("shufflePush_0_0_2_0", new NioManagedBuffer(ByteBuffer.wrap(new byte[0]))); diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java index edd5e1961a501..b7e24fe3da8fe 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RemoteBlockPushResolverSuite.java @@ -39,9 +39,7 @@ import java.util.concurrent.TimeUnit; import com.fasterxml.jackson.databind.ObjectMapper; -import com.google.common.collect.ImmutableMap; -import org.apache.commons.io.FileUtils; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.Assertions; import org.junit.jupiter.api.BeforeEach; @@ -68,6 +66,7 @@ import org.apache.spark.network.shuffle.protocol.RemoveShuffleMerge; import org.apache.spark.network.util.MapConfigProvider; import org.apache.spark.network.util.TransportConf; +import org.apache.spark.network.util.JavaUtils; /** * Tests for {@link RemoteBlockPushResolver}. @@ -97,7 +96,7 @@ public class RemoteBlockPushResolverSuite { public void before() throws IOException { localDirs = createLocalDirs(2); MapConfigProvider provider = new MapConfigProvider( - ImmutableMap.of("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "4")); + Map.of("spark.shuffle.push.server.minChunkSizeInMergedShuffleFile", "4")); conf = new TransportConf("shuffle", provider); pushResolver = new RemoteBlockPushResolver(conf, null); registerExecutor(TEST_APP, prepareLocalDirs(localDirs, MERGE_DIRECTORY), MERGE_DIRECTORY_META); @@ -107,7 +106,7 @@ public void before() throws IOException { public void after() { try { for (Path local : localDirs) { - FileUtils.deleteDirectory(local.toFile()); + JavaUtils.deleteRecursively(local.toFile()); } removeApplication(TEST_APP); } catch (Exception e) { diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java index 84c8b1b3353f2..cbbade779ab68 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/RetryingBlockTransferorSuite.java @@ -29,7 +29,6 @@ import java.util.concurrent.TimeoutException; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.Sets; import org.junit.jupiter.api.BeforeEach; import org.junit.jupiter.api.Test; import org.mockito.stubbing.Answer; @@ -353,15 +352,15 @@ public void testIOExceptionFailsConnectionEvenWithSaslException() new TimeoutException()); IOException ioException = new IOException(); List> interactions = Arrays.asList( - ImmutableMap.of("b0", saslExceptionInitial), - ImmutableMap.of("b0", ioException), - ImmutableMap.of("b0", saslExceptionInitial), - ImmutableMap.of("b0", ioException), - ImmutableMap.of("b0", saslExceptionFinal), + Map.of("b0", saslExceptionInitial), + Map.of("b0", ioException), + Map.of("b0", saslExceptionInitial), + Map.of("b0", ioException), + Map.of("b0", saslExceptionFinal), // will not get invoked because the connection fails - ImmutableMap.of("b0", ioException), + Map.of("b0", ioException), // will not get invoked - ImmutableMap.of("b0", block0) + Map.of("b0", block0) ); configMap.put("spark.shuffle.sasl.enableRetries", "true"); performInteractions(interactions, listener); @@ -425,7 +424,7 @@ private static void configureInteractions(List> in Stubber stub = null; // Contains all blockIds that are referenced across all interactions. - LinkedHashSet blockIds = Sets.newLinkedHashSet(); + LinkedHashSet blockIds = new LinkedHashSet<>(); for (Map interaction : interactions) { blockIds.addAll(interaction.keySet()); diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java index aef3bc51bcd4b..bd9884e81ba92 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/ShuffleTransportContextSuite.java @@ -18,12 +18,11 @@ package org.apache.spark.network.shuffle; import java.io.IOException; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; -import com.google.common.collect.Lists; - import io.netty.buffer.ByteBuf; import io.netty.buffer.ByteBufAllocator; import io.netty.buffer.Unpooled; @@ -74,7 +73,7 @@ ShuffleTransportContext createShuffleTransportContext(boolean separateFinalizeTh } private ByteBuf getDecodableMessageBuf(Message req) throws Exception { - List out = Lists.newArrayList(); + List out = new ArrayList<>(); ChannelHandlerContext context = mock(ChannelHandlerContext.class); when(context.alloc()).thenReturn(ByteBufAllocator.DEFAULT); MessageEncoder.INSTANCE.encode(context, req, out); @@ -118,7 +117,7 @@ public void testDecodeOfFinalizeShuffleMessage() throws Exception { try (ShuffleTransportContext shuffleTransportContext = createShuffleTransportContext(true)) { ShuffleTransportContext.ShuffleMessageDecoder decoder = (ShuffleTransportContext.ShuffleMessageDecoder) shuffleTransportContext.getDecoder(); - List out = Lists.newArrayList(); + List out = new ArrayList<>(); decoder.decode(mock(ChannelHandlerContext.class), messageBuf, out); Assertions.assertEquals(1, out.size()); @@ -137,7 +136,7 @@ public void testDecodeOfAnyOtherRpcMessage() throws Exception { try (ShuffleTransportContext shuffleTransportContext = createShuffleTransportContext(true)) { ShuffleTransportContext.ShuffleMessageDecoder decoder = (ShuffleTransportContext.ShuffleMessageDecoder) shuffleTransportContext.getDecoder(); - List out = Lists.newArrayList(); + List out = new ArrayList<>(); decoder.decode(mock(ChannelHandlerContext.class), messageBuf, out); Assertions.assertEquals(1, out.size()); diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java index 061d63dbcd72d..a04ec60ca1c1c 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslExternalShuffleSecuritySuite.java @@ -17,7 +17,7 @@ package org.apache.spark.network.shuffle; -import com.google.common.collect.ImmutableMap; +import java.util.Map; import org.apache.spark.network.ssl.SslSampleConfigs; import org.apache.spark.network.util.TransportConf; @@ -30,9 +30,7 @@ protected TransportConf createTransportConf(boolean encrypt) { return new TransportConf( "shuffle", SslSampleConfigs.createDefaultConfigProviderForRpcNamespaceWithAdditionalEntries( - ImmutableMap.of( - "spark.authenticate.enableSaslEncryption", - "true") + Map.of("spark.authenticate.enableSaslEncryption", "true") ) ); } else { diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java index 51463bbad5576..1a85838792d29 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/SslShuffleTransportContextSuite.java @@ -17,7 +17,7 @@ package org.apache.spark.network.shuffle; -import com.google.common.collect.ImmutableMap; +import java.util.Map; import org.apache.spark.network.ssl.SslSampleConfigs; import org.apache.spark.network.util.TransportConf; @@ -29,7 +29,7 @@ protected TransportConf createTransportConf(boolean separateFinalizeThread) { return new TransportConf( "shuffle", SslSampleConfigs.createDefaultConfigProviderForRpcNamespaceWithAdditionalEntries( - ImmutableMap.of( + Map.of( "spark.shuffle.server.finalizeShuffleMergeThreadsPercent", separateFinalizeThread ? "1" : "0") ) diff --git a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java index 49b17824c3c72..4b8dc33c6bf52 100644 --- a/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java +++ b/common/network-shuffle/src/test/java/org/apache/spark/network/shuffle/TestShuffleDataContext.java @@ -22,6 +22,7 @@ import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; +import java.nio.file.Files; import com.google.common.io.Closeables; @@ -54,7 +55,7 @@ public void create() throws IOException { localDirs[i] = JavaUtils.createDirectory(root, "spark").getAbsolutePath(); for (int p = 0; p < subDirsPerLocalDir; p ++) { - new File(localDirs[i], String.format("%02x", p)).mkdirs(); + Files.createDirectories(new File(localDirs[i], String.format("%02x", p)).toPath()); } } } diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 78289684960ed..c4451923b17a5 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -48,7 +48,7 @@ org.apache.spark spark-tags_${scala.binary.version} - test + provided diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java index e455e531de0dd..2b9457c58560f 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationAwareUTF8String.java @@ -1529,9 +1529,10 @@ public static UTF8String trimRight( } public static UTF8String[] splitSQL(final UTF8String input, final UTF8String delim, - final int limit, final int collationId) { + final int limit, final int collationId, boolean legacySplitTruncate) { if (CollationFactory.fetchCollation(collationId).isUtf8BinaryType) { - return input.split(delim, limit); + return legacySplitTruncate ? + input.splitLegacyTruncate(delim, limit) : input.split(delim, limit); } else if (CollationFactory.fetchCollation(collationId).isUtf8LcaseType) { return lowercaseSplitSQL(input, delim, limit); } else { diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java index 4bcd75a731059..59c23064858d0 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationFactory.java @@ -22,7 +22,6 @@ import java.util.concurrent.ConcurrentHashMap; import java.util.function.Function; import java.util.function.BiFunction; -import java.util.function.ToLongFunction; import java.util.stream.Stream; import com.ibm.icu.text.CollationKey; @@ -125,10 +124,19 @@ public static class Collation { public final String version; /** - * Collation sensitive hash function. Output for two UTF8Strings will be the same if they are - * equal according to the collation. + * Returns the sort key of the input UTF8String. Two UTF8String values are equal iff their + * sort keys are equal (compared as byte arrays). + * The sort key is defined as follows for collations without the RTRIM modifier: + * - UTF8_BINARY: It is the bytes of the string. + * - UTF8_LCASE: It is byte array we get by replacing all invalid UTF8 sequences with the + * Unicode replacement character and then converting all characters of the replaced string + * with their lowercase equivalents (the Greek capital and Greek small sigma both map to + * the Greek final sigma). + * - ICU collations: It is the byte array returned by the ICU library for the collated string. + * For strings with the RTRIM modifier, we right-trim the string and return the collation key + * of the resulting right-trimmed string. */ - public final ToLongFunction hashFunction; + public final Function sortKeyFunction; /** * Potentially faster way than using comparator to compare two UTF8Strings for equality. @@ -182,7 +190,7 @@ public Collation( Collator collator, Comparator comparator, String version, - ToLongFunction hashFunction, + Function sortKeyFunction, BiFunction equalsFunction, boolean isUtf8BinaryType, boolean isUtf8LcaseType, @@ -192,7 +200,7 @@ public Collation( this.collator = collator; this.comparator = comparator; this.version = version; - this.hashFunction = hashFunction; + this.sortKeyFunction = sortKeyFunction; this.isUtf8BinaryType = isUtf8BinaryType; this.isUtf8LcaseType = isUtf8LcaseType; this.equalsFunction = equalsFunction; @@ -581,18 +589,18 @@ private static boolean isValidCollationId(int collationId) { protected Collation buildCollation() { if (caseSensitivity == CaseSensitivity.UNSPECIFIED) { Comparator comparator; - ToLongFunction hashFunction; + Function sortKeyFunction; BiFunction equalsFunction; boolean supportsSpaceTrimming = spaceTrimming != SpaceTrimming.NONE; if (spaceTrimming == SpaceTrimming.NONE) { comparator = UTF8String::binaryCompare; - hashFunction = s -> (long) s.hashCode(); + sortKeyFunction = s -> s.getBytes(); equalsFunction = UTF8String::equals; } else { comparator = (s1, s2) -> applyTrimmingPolicy(s1, spaceTrimming).binaryCompare( applyTrimmingPolicy(s2, spaceTrimming)); - hashFunction = s -> (long) applyTrimmingPolicy(s, spaceTrimming).hashCode(); + sortKeyFunction = s -> applyTrimmingPolicy(s, spaceTrimming).getBytes(); equalsFunction = (s1, s2) -> applyTrimmingPolicy(s1, spaceTrimming).equals( applyTrimmingPolicy(s2, spaceTrimming)); } @@ -603,25 +611,25 @@ protected Collation buildCollation() { null, comparator, CollationSpecICU.ICU_VERSION, - hashFunction, + sortKeyFunction, equalsFunction, /* isUtf8BinaryType = */ true, /* isUtf8LcaseType = */ false, spaceTrimming != SpaceTrimming.NONE); } else { Comparator comparator; - ToLongFunction hashFunction; + Function sortKeyFunction; if (spaceTrimming == SpaceTrimming.NONE) { comparator = CollationAwareUTF8String::compareLowerCase; - hashFunction = s -> - (long) CollationAwareUTF8String.lowerCaseCodePoints(s).hashCode(); + sortKeyFunction = s -> + CollationAwareUTF8String.lowerCaseCodePoints(s).getBytes(); } else { comparator = (s1, s2) -> CollationAwareUTF8String.compareLowerCase( applyTrimmingPolicy(s1, spaceTrimming), applyTrimmingPolicy(s2, spaceTrimming)); - hashFunction = s -> (long) CollationAwareUTF8String.lowerCaseCodePoints( - applyTrimmingPolicy(s, spaceTrimming)).hashCode(); + sortKeyFunction = s -> CollationAwareUTF8String.lowerCaseCodePoints( + applyTrimmingPolicy(s, spaceTrimming)).getBytes(); } return new Collation( @@ -630,7 +638,7 @@ protected Collation buildCollation() { null, comparator, CollationSpecICU.ICU_VERSION, - hashFunction, + sortKeyFunction, (s1, s2) -> comparator.compare(s1, s2) == 0, /* isUtf8BinaryType = */ false, /* isUtf8LcaseType = */ true, @@ -1013,19 +1021,18 @@ protected Collation buildCollation() { collator.freeze(); Comparator comparator; - ToLongFunction hashFunction; + Function sortKeyFunction; if (spaceTrimming == SpaceTrimming.NONE) { - hashFunction = s -> (long) collator.getCollationKey( - s.toValidString()).hashCode(); comparator = (s1, s2) -> collator.compare(s1.toValidString(), s2.toValidString()); + sortKeyFunction = s -> collator.getCollationKey(s.toValidString()).toByteArray(); } else { comparator = (s1, s2) -> collator.compare( applyTrimmingPolicy(s1, spaceTrimming).toValidString(), applyTrimmingPolicy(s2, spaceTrimming).toValidString()); - hashFunction = s -> (long) collator.getCollationKey( - applyTrimmingPolicy(s, spaceTrimming).toValidString()).hashCode(); + sortKeyFunction = s -> collator.getCollationKey( + applyTrimmingPolicy(s, spaceTrimming).toValidString()).toByteArray(); } return new Collation( @@ -1034,7 +1041,7 @@ protected Collation buildCollation() { collator, comparator, ICU_VERSION, - hashFunction, + sortKeyFunction, (s1, s2) -> comparator.compare(s1, s2) == 0, /* isUtf8BinaryType = */ false, /* isUtf8LcaseType = */ false, diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java index 135250e482b16..f950fd864c576 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/CollationSupport.java @@ -706,8 +706,10 @@ public static int collationAwareRegexFlags(final int collationId) { public static UTF8String lowercaseRegex(final UTF8String regex) { return UTF8String.concat(lowercaseRegexPrefix, regex); } - public static UTF8String collationAwareRegex(final UTF8String regex, final int collationId) { - return supportsLowercaseRegex(collationId) ? lowercaseRegex(regex) : regex; + public static UTF8String collationAwareRegex( + final UTF8String regex, final int collationId, boolean notIgnoreEmpty) { + return supportsLowercaseRegex(collationId) && (notIgnoreEmpty || regex.numBytes() != 0) + ? lowercaseRegex(regex) : regex; } /** diff --git a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java index 0ae238564d591..d52207ad860cd 100644 --- a/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java +++ b/common/unsafe/src/main/java/org/apache/spark/sql/catalyst/util/DateTimeConstants.java @@ -45,4 +45,5 @@ public class DateTimeConstants { public static final long NANOS_PER_MICROS = 1000L; public static final long NANOS_PER_MILLIS = MICROS_PER_MILLIS * NANOS_PER_MICROS; public static final long NANOS_PER_SECOND = MILLIS_PER_SECOND * NANOS_PER_MILLIS; + public static final long NANOS_PER_DAY = MICROS_PER_DAY * NANOS_PER_MICROS; } diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java index f12408fb49313..310dbce9eaab6 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/ByteArray.java @@ -20,9 +20,8 @@ import java.nio.ByteOrder; import java.util.Arrays; -import com.google.common.primitives.Ints; - import org.apache.spark.unsafe.Platform; +import org.apache.spark.network.util.JavaUtils; public final class ByteArray { @@ -169,7 +168,7 @@ public static byte[] concatWS(byte[] delimiter, byte[]... inputs) { } if (totalLength > 0) totalLength -= delimiter.length; // Allocate a new byte array, and copy the inputs one by one into it - final byte[] result = new byte[Ints.checkedCast(totalLength)]; + final byte[] result = new byte[JavaUtils.checkedCast(totalLength)]; int offset = 0; for (int i = 0; i < inputs.length; i++) { byte[] input = inputs[i]; diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java new file mode 100644 index 0000000000000..48dc6f896e91a --- /dev/null +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeographyVal.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.unsafe.types; + +import java.io.Serializable; + +// This class represents the physical type for the GEOGRAPHY data type. +public final class GeographyVal implements Comparable, Serializable { + + // The GEOGRAPHY type is implemented as a byte array. We provide `getBytes` and `fromBytes` + // methods for readers and writers to access this underlying array of bytes. + private final byte[] value; + + // We make the constructor private. We should use `fromBytes` to create new instances. + private GeographyVal(byte[] value) { + this.value = value; + } + + public byte[] getBytes() { + return value; + } + + public static GeographyVal fromBytes(byte[] bytes) { + if (bytes == null) { + return null; + } else { + return new GeographyVal(bytes); + } + } + + // Comparison is not yet supported for GEOGRAPHY. + public int compareTo(GeographyVal g) { + throw new UnsupportedOperationException(); + } +} diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java new file mode 100644 index 0000000000000..2bb7f194c940d --- /dev/null +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/GeometryVal.java @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.unsafe.types; + +import java.io.Serializable; + +// This class represents the physical type for the GEOMETRY data type. +public final class GeometryVal implements Comparable, Serializable { + + // The GEOMETRY type is implemented as a byte array. We provide `getBytes` and `fromBytes` + // methods for readers and writers to access this underlying array of bytes. + private final byte[] value; + + // We make the constructor private. We should use `fromBytes` to create new instances. + private GeometryVal(byte[] value) { + this.value = value; + } + + public byte[] getBytes() { + return value; + } + + public static GeometryVal fromBytes(byte[] bytes) { + if (bytes == null) { + return null; + } else { + return new GeometryVal(bytes); + } + } + + // Comparison is not yet supported for GEOMETRY. + public int compareTo(GeometryVal g) { + throw new UnsupportedOperationException(); + } +} diff --git a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java index caf8461b0b5d6..87d004040c3a0 100644 --- a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java +++ b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/UTF8String.java @@ -642,9 +642,13 @@ public UTF8String substring(final int start, final int until) { } int j = i; - while (i < numBytes && c < until) { - i += numBytesForFirstByte(getByte(i)); - c += 1; + if (until == Integer.MAX_VALUE) { + i = numBytes; + } else { + while (i < numBytes && c < until) { + i += numBytesForFirstByte(getByte(i)); + c += 1; + } } if (i > j) { @@ -663,9 +667,8 @@ public UTF8String substringSQL(int pos, int length) { // refers to element i-1 in the sequence. If a start index i is less than 0, it refers // to the -ith element before the end of the sequence. If a start index i is 0, it // refers to the first element. - int len = numChars(); // `len + pos` does not overflow as `len >= 0`. - int start = (pos > 0) ? pos -1 : ((pos < 0) ? len + pos : 0); + int start = (pos > 0) ? pos -1 : ((pos < 0) ? numChars() + pos : 0); int end; if ((long) start + length > Integer.MAX_VALUE) { @@ -1168,10 +1171,21 @@ public UTF8String reverse() { } public UTF8String repeat(int times) { - if (times <= 0) { + if (times <= 0 || numBytes == 0) { return EMPTY_UTF8; } + if (times == 1) { + return this; + } + + if (numBytes == 1) { + byte[] newBytes = new byte[times]; + byte b = getByte(0); + Arrays.fill(newBytes, b); + return fromBytes(newBytes); + } + byte[] newBytes = new byte[Math.multiplyExact(numBytes, times)]; copyMemory(this.base, this.offset, newBytes, BYTE_ARRAY_OFFSET, numBytes); @@ -1483,6 +1497,25 @@ public static UTF8String concatWs(UTF8String separator, UTF8String... inputs) { } public UTF8String[] split(UTF8String pattern, int limit) { + // For the empty `pattern` a `split` function ignores trailing empty strings unless original + // string is empty. + if (numBytes() != 0 && pattern.numBytes() == 0) { + int newLimit = limit > numChars() || limit <= 0 ? numChars() : limit; + byte[] input = getBytes(); + int byteIndex = 0; + UTF8String[] result = new UTF8String[newLimit]; + for (int charIndex = 0; charIndex < newLimit - 1; charIndex++) { + int currCharNumBytes = numBytesForFirstByte(input[byteIndex]); + result[charIndex] = UTF8String.fromBytes(input, byteIndex, currCharNumBytes); + byteIndex += currCharNumBytes; + } + result[newLimit - 1] = UTF8String.fromBytes(input, byteIndex, numBytes() - byteIndex); + return result; + } + return split(pattern.toString(), limit); + } + + public UTF8String[] splitLegacyTruncate(UTF8String pattern, int limit) { // For the empty `pattern` a `split` function ignores trailing empty strings unless original // string is empty. if (numBytes() != 0 && pattern.numBytes() == 0) { diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java new file mode 100644 index 0000000000000..639a8b2f77821 --- /dev/null +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeographyValSuite.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.unsafe.types; + +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class GeographyValSuite { + + @Test + public void roundTripBytes() { + // A simple byte array to test the round trip (`fromBytes` -> `getBytes`). + byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 }; + GeographyVal geographyVal = GeographyVal.fromBytes(bytes); + assertNotNull(geographyVal); + assertArrayEquals(bytes, geographyVal.getBytes()); + } + + @Test + public void roundNullHandling() { + // A simple null byte array to test null handling for GEOGRAPHY. + byte[] bytes = null; + GeographyVal geographyVal = GeographyVal.fromBytes(bytes); + assertNull(geographyVal); + } + + @Test + public void testCompareTo() { + // Comparison is not yet supported for GEOGRAPHY. + byte[] bytes1 = new byte[] { 1, 2, 3 }; + byte[] bytes2 = new byte[] { 4, 5, 6 }; + GeographyVal geographyVal1 = GeographyVal.fromBytes(bytes1); + GeographyVal geographyVal2 = GeographyVal.fromBytes(bytes2); + try { + geographyVal1.compareTo(geographyVal2); + } catch (UnsupportedOperationException e) { + assert(e.toString().equals("java.lang.UnsupportedOperationException")); + } + } +} diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java new file mode 100644 index 0000000000000..e38c6903e6ddc --- /dev/null +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/GeometryValSuite.java @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.unsafe.types; + +import org.junit.jupiter.api.Test; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; + +public class GeometryValSuite { + + @Test + public void roundTripBytes() { + // A simple byte array to test the round trip (`fromBytes` -> `getBytes`). + byte[] bytes = new byte[] { 1, 2, 3, 4, 5, 6 }; + GeometryVal geometryVal = GeometryVal.fromBytes(bytes); + assertNotNull(geometryVal); + assertArrayEquals(bytes, geometryVal.getBytes()); + } + + @Test + public void roundNullHandling() { + // A simple null byte array to test null handling for GEOMETRY. + byte[] bytes = null; + GeometryVal geometryVal = GeometryVal.fromBytes(bytes); + assertNull(geometryVal); + } + + @Test + public void testCompareTo() { + // Comparison is not yet supported for GEOMETRY. + byte[] bytes1 = new byte[] { 1, 2, 3 }; + byte[] bytes2 = new byte[] { 4, 5, 6 }; + GeometryVal geometryVal1 = GeometryVal.fromBytes(bytes1); + GeometryVal geometryVal2 = GeometryVal.fromBytes(bytes2); + try { + geometryVal1.compareTo(geometryVal2); + } catch (UnsupportedOperationException e) { + assert(e.toString().equals("java.lang.UnsupportedOperationException")); + } + } +} diff --git a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java index c4a66fdffdd4d..26b96155377e8 100644 --- a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java +++ b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/UTF8StringSuite.java @@ -24,7 +24,6 @@ import java.nio.charset.StandardCharsets; import java.util.*; -import com.google.common.collect.ImmutableMap; import org.apache.spark.unsafe.Platform; import org.apache.spark.unsafe.UTF8StringBuilder; @@ -432,7 +431,7 @@ public void split() { new UTF8String[]{fromString("a"), fromString("b")}, fromString("ab").split(fromString(""), 100)); assertArrayEquals( - new UTF8String[]{fromString("a")}, + new UTF8String[]{fromString("ab")}, fromString("ab").split(fromString(""), 1)); assertArrayEquals( new UTF8String[]{fromString("")}, @@ -495,7 +494,7 @@ public void levenshteinDistance() { public void translate() { assertEquals( fromString("1a2s3ae"), - fromString("translate").translate(ImmutableMap.of( + fromString("translate").translate(Map.of( "r", "1", "n", "2", "l", "3", @@ -506,7 +505,7 @@ public void translate() { fromString("translate").translate(new HashMap<>())); assertEquals( fromString("asae"), - fromString("translate").translate(ImmutableMap.of( + fromString("translate").translate(Map.of( "r", "\0", "n", "\0", "l", "\0", @@ -514,7 +513,7 @@ public void translate() { ))); assertEquals( fromString("aa世b"), - fromString("花花世界").translate(ImmutableMap.of( + fromString("花花世界").translate(Map.of( "花", "a", "界", "b" ))); diff --git a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala index 8e9d33efe7a6d..ddf588b6c64c7 100644 --- a/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala +++ b/common/unsafe/src/test/scala/org/apache/spark/unsafe/types/CollationFactorySuite.scala @@ -17,7 +17,8 @@ package org.apache.spark.unsafe.types -import scala.collection.parallel.immutable.ParSeq +import java.util.stream.IntStream + import scala.jdk.CollectionConverters.MapHasAsScala import com.ibm.icu.util.ULocale @@ -139,7 +140,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig case class CollationTestCase[R](collationName: String, s1: String, s2: String, expectedResult: R) - test("collation aware equality and hash") { + test("collation aware equality and sort key") { val checks = Seq( CollationTestCase("UTF8_BINARY", "aaa", "aaa", true), CollationTestCase("UTF8_BINARY", "aaa", "AAA", false), @@ -194,9 +195,9 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig assert(collation.equalsFunction(toUTF8(testCase.s1), toUTF8(testCase.s2)) == testCase.expectedResult) - val hash1 = collation.hashFunction.applyAsLong(toUTF8(testCase.s1)) - val hash2 = collation.hashFunction.applyAsLong(toUTF8(testCase.s2)) - assert((hash1 == hash2) == testCase.expectedResult) + val sortKey1 = collation.sortKeyFunction.apply(toUTF8(testCase.s1)).asInstanceOf[Array[Byte]] + val sortKey2 = collation.sortKeyFunction.apply(toUTF8(testCase.s2)).asInstanceOf[Array[Byte]] + assert(sortKey1.sameElements(sortKey2) == testCase.expectedResult) }) } @@ -293,7 +294,7 @@ class CollationFactorySuite extends AnyFunSuite with Matchers { // scalastyle:ig (0 to 10).foreach(_ => { val collator = fetchCollation("UNICODE").getCollator - ParSeq(0 to 100).foreach { _ => + IntStream.rangeClosed(0, 100).parallel().forEach { _ => collator.getCollationKey("aaa") } }) diff --git a/common/utils-java/pom.xml b/common/utils-java/pom.xml new file mode 100644 index 0000000000000..ba3603f810856 --- /dev/null +++ b/common/utils-java/pom.xml @@ -0,0 +1,84 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.13 + 4.1.0-SNAPSHOT + ../../pom.xml + + + spark-common-utils-java_2.13 + jar + Spark Project Common Java Utils + https://spark.apache.org/ + + common-utils-java + + + + + org.apache.spark + spark-tags_${scala.binary.version} + + + com.fasterxml.jackson.core + jackson-databind + + + org.slf4j + slf4j-api + + + + org.slf4j + jul-to-slf4j + + + org.slf4j + jcl-over-slf4j + + + org.apache.logging.log4j + log4j-slf4j2-impl + + + org.apache.logging.log4j + log4j-api + + + org.apache.logging.log4j + log4j-core + + + org.apache.logging.log4j + log4j-1.2-api + + + org.apache.logging.log4j + log4j-layout-template-json + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + diff --git a/common/utils/src/main/java/org/apache/spark/QueryContext.java b/common/utils-java/src/main/java/org/apache/spark/QueryContext.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/QueryContext.java rename to common/utils-java/src/main/java/org/apache/spark/QueryContext.java diff --git a/common/utils/src/main/java/org/apache/spark/QueryContextType.java b/common/utils-java/src/main/java/org/apache/spark/QueryContextType.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/QueryContextType.java rename to common/utils-java/src/main/java/org/apache/spark/QueryContextType.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/CoGroupFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFlatMapFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/DoubleFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FilterFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FilterFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/FilterFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FilterFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapFunction2.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/FlatMapGroupsFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/ForeachPartitionFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function0.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function0.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function0.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function0.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function2.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function2.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function2.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function2.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function3.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function3.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function3.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function3.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/Function4.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/Function4.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/Function4.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/Function4.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/MapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/MapFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/MapFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/MapFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/MapGroupsFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/MapPartitionsFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFlatMapFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/PairFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/PairFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/PairFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/ReduceFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/VoidFunction2.java diff --git a/common/utils/src/main/java/org/apache/spark/api/java/function/package-info.java b/common/utils-java/src/main/java/org/apache/spark/api/java/function/package-info.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/api/java/function/package-info.java rename to common/utils-java/src/main/java/org/apache/spark/api/java/function/package-info.java diff --git a/common/utils-java/src/main/java/org/apache/spark/internal/LogKey.java b/common/utils-java/src/main/java/org/apache/spark/internal/LogKey.java new file mode 100644 index 0000000000000..0bd0fecb43976 --- /dev/null +++ b/common/utils-java/src/main/java/org/apache/spark/internal/LogKey.java @@ -0,0 +1,52 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.internal; + +/** + * All structured logging `keys` used in `MDC` must be extends `LogKey` + *

+ * + * `LogKey`s serve as identifiers for mapped diagnostic contexts (MDC) within logs. + * Follow these guidelines when adding a new LogKey: + *

    + *
  • + * Define all structured logging keys in `LogKeys.java`, and sort them alphabetically for + * ease of search. + *
  • + *
  • + * Use `UPPER_SNAKE_CASE` for key names. + *
  • + *
  • + * Key names should be both simple and broad, yet include specific identifiers like `STAGE_ID`, + * `TASK_ID`, and `JOB_ID` when needed for clarity. For instance, use `MAX_ATTEMPTS` as a + * general key instead of creating separate keys for each scenario such as + * `EXECUTOR_STATE_SYNC_MAX_ATTEMPTS` and `MAX_TASK_FAILURES`. + * This balances simplicity with the detail needed for effective logging. + *
  • + *
  • + * Use abbreviations in names if they are widely understood, + * such as `APP_ID` for APPLICATION_ID, and `K8S` for KUBERNETES. + *
  • + *
  • + * For time-related keys, use milliseconds as the unit of time. + *
  • + *
+ */ +public interface LogKey { + String name(); +} diff --git a/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java b/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java new file mode 100644 index 0000000000000..e90683a205752 --- /dev/null +++ b/common/utils-java/src/main/java/org/apache/spark/internal/LogKeys.java @@ -0,0 +1,890 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.internal; + +/** + * Various keys used for mapped diagnostic contexts(MDC) in logging. All structured logging keys + * should be defined here for standardization. + */ +public enum LogKeys implements LogKey { + ACCUMULATOR_ID, + ACL_ENABLED, + ACTUAL_NUM_FILES, + ACTUAL_PARTITION_COLUMN, + ADDED_JARS, + ADMIN_ACLS, + ADMIN_ACL_GROUPS, + ADVISORY_TARGET_SIZE, + AGGREGATE_FUNCTIONS, + ALIGNED_FROM_TIME, + ALIGNED_TO_TIME, + ALPHA, + ANALYSIS_ERROR, + ANTLR_DFA_CACHE_DELTA, + ANTLR_DFA_CACHE_SIZE, + APP_ATTEMPT_ID, + APP_ATTEMPT_SHUFFLE_MERGE_ID, + APP_DESC, + APP_EXECUTOR_ID, + APP_ID, + APP_NAME, + APP_STATE, + ARCHIVE_NAME, + ARGS, + ARTIFACTS, + ARTIFACT_ID, + ATTRIBUTE_MAP, + AUTH_ENABLED, + AVG_BATCH_PROC_TIME, + BACKUP_FILE, + BARRIER_EPOCH, + BARRIER_ID, + BATCH_ID, + BATCH_NAME, + BATCH_TIMES, + BATCH_TIMESTAMP, + BATCH_WRITE, + BIND_ADDRESS, + BLOCK_GENERATOR_STATUS, + BLOCK_ID, + BLOCK_IDS, + BLOCK_MANAGER_ID, + BLOCK_MANAGER_IDS, + BLOCK_TYPE, + BOOT, + BOOTSTRAP_TIME, + BOOT_TIME, + BROADCAST, + BROADCAST_ID, + BROADCAST_OUTPUT_STATUS_SIZE, + BUCKET, + BYTECODE_SIZE, + BYTE_BUFFER, + BYTE_SIZE, + CACHED_TABLE_PARTITION_METADATA_SIZE, + CACHE_AUTO_REMOVED_SIZE, + CACHE_SIZE, + CACHE_UNTIL_HIGHEST_CONSUMED_SIZE, + CACHE_UNTIL_LAST_PRODUCED_SIZE, + CALL_SITE_LONG_FORM, + CALL_SITE_SHORT_FORM, + CANCEL_FUTURE_JOBS, + CATALOG_NAME, + CATEGORICAL_FEATURES, + CHECKPOINT_FILE, + CHECKPOINT_INTERVAL, + CHECKPOINT_LOCATION, + CHECKPOINT_PATH, + CHECKPOINT_ROOT, + CHECKPOINT_TIME, + CHOSEN_WATERMARK, + CLASSIFIER, + CLASS_LOADER, + CLASS_NAME, + CLASS_PATH, + CLASS_PATHS, + CLAUSES, + CLEANUP_LOCAL_DIRS, + CLUSTER_CENTROIDS, + CLUSTER_ID, + CLUSTER_LABEL, + CLUSTER_LEVEL, + CLUSTER_WEIGHT, + CODE, + CODEC_LEVEL, + CODEC_NAME, + CODEGEN_STAGE_ID, + COLUMN_DATA_TYPE_SOURCE, + COLUMN_DATA_TYPE_TARGET, + COLUMN_DEFAULT_VALUE, + COLUMN_NAME, + COLUMN_NAMES, + COMMAND, + COMMAND_OUTPUT, + COMMITTED_VERSION, + COMPACT_INTERVAL, + COMPONENT, + COMPUTE, + CONFIG, + CONFIG2, + CONFIG3, + CONFIG4, + CONFIG5, + CONFIG_DEPRECATION_MESSAGE, + CONFIG_KEY_UPDATED, + CONFIG_VERSION, + CONSUMER, + CONTAINER, + CONTAINER_ID, + CONTAINER_STATE, + CONTEXT, + COST, + COUNT, + CREATED_POOL_NAME, + CREATION_SITE, + CREDENTIALS_RENEWAL_INTERVAL_RATIO, + CROSS_VALIDATION_METRIC, + CROSS_VALIDATION_METRICS, + CSV_HEADER_COLUMN_NAME, + CSV_HEADER_COLUMN_NAMES, + CSV_HEADER_LENGTH, + CSV_SCHEMA_FIELD_NAME, + CSV_SCHEMA_FIELD_NAMES, + CSV_SOURCE, + CURRENT_BATCH_ID, + CURRENT_DISK_SIZE, + CURRENT_FILE, + CURRENT_MEMORY_SIZE, + CURRENT_PATH, + CURRENT_TIME, + DATA, + DATABASE_NAME, + DATAFRAME_CACHE_ENTRY, + DATAFRAME_ID, + DATA_FILE, + DATA_SOURCE, + DATA_SOURCES, + DEFAULT_COMPACT_INTERVAL, + DEFAULT_ISOLATION_LEVEL, + DEFAULT_NAME, + DEFAULT_VALUE, + DELAY, + DELEGATE, + DELTA, + DEPRECATED_KEY, + DERIVATIVE, + DESCRIPTION, + DESIRED_NUM_PARTITIONS, + DESIRED_TREE_DEPTH, + DESTINATION_PATH, + DFS_FILE, + DIFF_DELTA, + DIVISIBLE_CLUSTER_INDICES_SIZE, + DRIVER_ID, + DRIVER_JVM_MEMORY, + DRIVER_MEMORY_SIZE, + DRIVER_STATE, + DROPPED_PARTITIONS, + DSTREAM, + DURATION, + EARLIEST_LOADED_VERSION, + EFFECTIVE_STORAGE_LEVEL, + ELAPSED_TIME, + ENCODING, + ENDPOINT_NAME, + END_INDEX, + END_POINT, + END_VERSION, + ENGINE, + EPOCH, + ERROR, + ESTIMATOR_PARAM_MAP, + EVALUATED_FILTERS, + EVENT, + EVENT_LOG_DESTINATION, + EVENT_LOOP, + EVENT_NAME, + EVENT_QUEUE, + EXCEPTION, + EXECUTE_INFO, + EXECUTE_KEY, + EXECUTION_MEMORY_SIZE, + EXECUTION_PLAN_LEAVES, + EXECUTOR_BACKEND, + EXECUTOR_ENVS, + EXECUTOR_ENV_REGEX, + EXECUTOR_ID, + EXECUTOR_IDS, + EXECUTOR_LAUNCH_COMMANDS, + EXECUTOR_MEMORY_OFFHEAP, + EXECUTOR_MEMORY_OVERHEAD_SIZE, + EXECUTOR_MEMORY_SIZE, + EXECUTOR_RESOURCES, + EXECUTOR_SHUFFLE_INFO, + EXECUTOR_STATE, + EXECUTOR_TIMEOUT, + EXECUTOR_USER_CLASS_PATH_FIRST, + EXEC_AMOUNT, + EXISTING_FILE, + EXISTING_PATH, + EXIT_CODE, + EXPECTED_NUM_FILES, + EXPECTED_PARTITION_COLUMN, + EXPIRY_TIMESTAMP, + EXPR, + EXPR_TERMS, + EXTENDED_EXPLAIN_GENERATOR, + FAILED_STAGE, + FAILED_STAGE_NAME, + FAILURES, + FALLBACK_VERSION, + FEATURE_COLUMN, + FEATURE_DIMENSION, + FEATURE_NAME, + FETCH_SIZE, + FIELD_NAME, + FIELD_TYPE, + FILES, + FILE_ABSOLUTE_PATH, + FILE_END_OFFSET, + FILE_FORMAT, + FILE_FORMAT2, + FILE_LENGTH_XATTR, + FILE_MODIFICATION_TIME, + FILE_NAME, + FILE_NAME2, + FILE_NAME3, + FILE_NAMES, + FILE_START_OFFSET, + FILE_SYSTEM, + FILE_VERSION, + FILTER, + FINAL_CONTEXT, + FINAL_OUTPUT_PATH, + FINAL_PATH, + FINISH_TIME, + FINISH_TRIGGER_DURATION, + FLOW_NAME, + FREE_MEMORY_SIZE, + FROM_OFFSET, + FROM_TIME, + FS_DATA_OUTPUT_STREAM, + FUNCTION_NAME, + FUNCTION_PARAM, + GLOBAL_INIT_FILE, + GLOBAL_WATERMARK, + GROUP_BY_EXPRS, + GROUP_ID, + HADOOP_VERSION, + HASH_JOIN_KEYS, + HASH_MAP_SIZE, + HEARTBEAT, + HEARTBEAT_INTERVAL, + HISTORY_DIR, + HIVE_CLIENT_VERSION, + HIVE_METASTORE_VERSION, + HIVE_OPERATION_STATE, + HIVE_OPERATION_TYPE, + HOST, + HOSTS, + HOST_LOCAL_BLOCKS_SIZE, + HOST_PORT, + HOST_PORT2, + HUGE_METHOD_LIMIT, + HYBRID_STORE_DISK_BACKEND, + IDENTIFIER, + INCOMPATIBLE_TYPES, + INDEX, + INDEX_FILE, + INDEX_NAME, + INFERENCE_MODE, + INIT, + INITIAL_CAPACITY, + INITIAL_HEARTBEAT_INTERVAL, + INIT_MODE, + INIT_TIME, + INPUT, + INPUT_SPLIT, + INTEGRAL, + INTERRUPT_THREAD, + INTERVAL, + INVALID_PARAMS, + ISOLATION_LEVEL, + ISSUE_DATE, + IS_NETWORK_REQUEST_DONE, + JAR_ENTRY, + JAR_MESSAGE, + JAR_URL, + JAVA_VERSION, + JAVA_VM_NAME, + JOB_ID, + JOIN_CONDITION, + JOIN_CONDITION_SUB_EXPR, + JOIN_TYPE, + K8S_CONTEXT, + KEY, + KEY2, + KEYTAB, + KEYTAB_FILE, + KILL_EXECUTORS, + KINESIS_REASON, + LABEL_COLUMN, + LARGEST_CLUSTER_INDEX, + LAST_ACCESS_TIME, + LAST_COMMITTED_CHECKPOINT_ID, + LAST_COMMIT_BASED_CHECKPOINT_ID, + LAST_SCAN_TIME, + LAST_VALID_TIME, + LATEST_BATCH_ID, + LATEST_COMMITTED_BATCH_ID, + LATEST_SHUFFLE_MERGE_ID, + LEARNING_RATE, + LEFT_EXPR, + LEFT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES, + LINE, + LINEAGE, + LINE_NUM, + LISTENER, + LOADED_CHECKPOINT_ID, + LOADED_VERSION, + LOAD_FACTOR, + LOAD_TIME, + LOCALE, + LOCAL_BLOCKS_SIZE, + LOCAL_SCRATCH_DIR, + LOCATION, + LOGICAL_PLAN, + LOGICAL_PLAN_COLUMNS, + LOGICAL_PLAN_LEAVES, + LOG_ID, + LOG_LEVEL, + LOG_OFFSET, + LOG_TYPE, + LOSSES, + LOWER_BOUND, + MALFORMATTED_STRING, + MAP_ID, + MASTER_URL, + MAX_ATTEMPTS, + MAX_CACHE_UNTIL_HIGHEST_CONSUMED_SIZE, + MAX_CACHE_UNTIL_LAST_PRODUCED_SIZE, + MAX_CAPACITY, + MAX_CATEGORIES, + MAX_EXECUTOR_FAILURES, + MAX_FILE_VERSION, + MAX_JVM_METHOD_PARAMS_LENGTH, + MAX_MEMORY_SIZE, + MAX_METHOD_CODE_SIZE, + MAX_NUM_BINS, + MAX_NUM_CHUNKS, + MAX_NUM_FILES, + MAX_NUM_LOG_POLICY, + MAX_NUM_PARTITIONS, + MAX_NUM_POSSIBLE_BINS, + MAX_NUM_ROWS_IN_MEMORY_BUFFER, + MAX_SEEN_VERSION, + MAX_SERVICE_NAME_LENGTH, + MAX_SIZE, + MAX_SLOTS, + MAX_SPLIT_BYTES, + MAX_TABLE_PARTITION_METADATA_SIZE, + MEMORY_CONSUMER, + MEMORY_POOL_NAME, + MEMORY_SIZE, + MEMORY_THRESHOLD_SIZE, + MERGE_DIR_NAME, + MESSAGE, + METADATA, + METADATA_DIRECTORY, + METADATA_JSON, + META_FILE, + METHOD_NAME, + METHOD_PARAM_TYPES, + METRICS_JSON, + METRIC_NAME, + MINI_BATCH_FRACTION, + MIN_COMPACTION_BATCH_ID, + MIN_NUM_FREQUENT_PATTERN, + MIN_POINT_PER_CLUSTER, + MIN_RATE, + MIN_SEEN_VERSION, + MIN_SHARE, + MIN_SIZE, + MIN_TIME, + MIN_VERSIONS_TO_DELETE, + MIN_VERSION_NUM, + MISSING_PARENT_STAGES, + MODEL_WEIGHTS, + MODIFY_ACLS, + MODIFY_ACLS_GROUPS, + MODULE_NAME, + NAME, + NAMESPACE, + NETWORK_IF, + NEW_FEATURE_COLUMN_NAME, + NEW_LABEL_COLUMN_NAME, + NEW_PATH, + NEW_RDD_ID, + NEW_STATE, + NEW_VALUE, + NEXT_RENEWAL_TIME, + NODES, + NODE_LOCATION, + NON_BUILT_IN_CONNECTORS, + NORM, + NUM_ADDED_PARTITIONS, + NUM_APPS, + NUM_ATTEMPT, + NUM_BATCHES, + NUM_BIN, + NUM_BLOCKS, + NUM_BLOCK_IDS, + NUM_BROADCAST_BLOCK, + NUM_BYTES, + NUM_BYTES_CURRENT, + NUM_BYTES_EVICTED, + NUM_BYTES_MAX, + NUM_BYTES_TO_FREE, + NUM_BYTES_TO_WARN, + NUM_BYTES_USED, + NUM_CATEGORIES, + NUM_CHECKSUM_FILE, + NUM_CHUNKS, + NUM_CLASSES, + NUM_COEFFICIENTS, + NUM_COLUMNS, + NUM_CONCURRENT_WRITER, + NUM_CORES, + NUM_DATA_FILE, + NUM_DATA_FILES, + NUM_DECOMMISSIONED, + NUM_DRIVERS, + NUM_DROPPED_PARTITIONS, + NUM_EFFECTIVE_RULE_OF_RUNS, + NUM_ELEMENTS_SPILL_RECORDS, + NUM_ELEMENTS_SPILL_THRESHOLD, + NUM_EVENTS, + NUM_EXAMPLES, + NUM_EXECUTORS, + NUM_EXECUTORS_EXITED, + NUM_EXECUTORS_KILLED, + NUM_EXECUTOR_CORES, + NUM_EXECUTOR_CORES_REMAINING, + NUM_EXECUTOR_CORES_TOTAL, + NUM_EXECUTOR_DESIRED, + NUM_EXECUTOR_LAUNCH, + NUM_EXECUTOR_TARGET, + NUM_FAILURES, + NUM_FEATURES, + NUM_FILES, + NUM_FILES_COPIED, + NUM_FILES_FAILED_TO_DELETE, + NUM_FILES_REUSED, + NUM_FREQUENT_ITEMS, + NUM_HOST_LOCAL_BLOCKS, + NUM_INDEX_FILE, + NUM_INDEX_FILES, + NUM_ITERATIONS, + NUM_KAFKA_PULLS, + NUM_KAFKA_RECORDS_PULLED, + NUM_LAGGING_STORES, + NUM_LEADING_SINGULAR_VALUES, + NUM_LEFT_PARTITION_VALUES, + NUM_LOADED_ENTRIES, + NUM_LOCAL_BLOCKS, + NUM_LOCAL_DIRS, + NUM_LOCAL_FREQUENT_PATTERN, + NUM_MERGERS, + NUM_MERGER_LOCATIONS, + NUM_META_FILES, + NUM_NODES, + NUM_PARTITIONS, + NUM_PARTITIONS2, + NUM_PATHS, + NUM_PEERS, + NUM_PEERS_REPLICATED_TO, + NUM_PEERS_TO_REPLICATE_TO, + NUM_PENDING_LAUNCH_TASKS, + NUM_POD, + NUM_POD_SHARED_SLOT, + NUM_POD_TARGET, + NUM_POINT, + NUM_PREFIXES, + NUM_PRUNED, + NUM_PUSH_MERGED_LOCAL_BLOCKS, + NUM_RECEIVERS, + NUM_RECORDS_READ, + NUM_RELEASED_LOCKS, + NUM_REMAINED, + NUM_REMOTE_BLOCKS, + NUM_REMOVED_WORKERS, + NUM_REPLICAS, + NUM_REQUESTS, + NUM_REQUEST_SYNC_TASK, + NUM_RESOURCE_SLOTS, + NUM_RETRIES, + NUM_RETRY, + NUM_RIGHT_PARTITION_VALUES, + NUM_ROWS, + NUM_RULE_OF_RUNS, + NUM_SEQUENCES, + NUM_SKIPPED, + NUM_SLOTS, + NUM_SPILLS, + NUM_SPILL_WRITERS, + NUM_SUB_DIRS, + NUM_SUCCESSFUL_TASKS, + NUM_TASKS, + NUM_TASK_CPUS, + NUM_TRAIN_WORD, + NUM_UNFINISHED_DECOMMISSIONED, + NUM_VERSIONS_RETAIN, + NUM_WEIGHTED_EXAMPLES, + NUM_WORKERS, + OBJECT_AGG_SORT_BASED_FALLBACK_THRESHOLD, + OBJECT_ID, + OFFSET, + OFFSETS, + OFFSET_SEQUENCE_METADATA, + OLD_BLOCK_MANAGER_ID, + OLD_GENERATION_GC, + OLD_VALUE, + OPEN_COST_IN_BYTES, + OPERATION, + OPERATION_HANDLE, + OPERATION_HANDLE_ID, + OPERATION_ID, + OPTIMIZED_PLAN_COLUMNS, + OPTIMIZER_CLASS_NAME, + OPTIONS, + OP_ID, + OP_TYPE, + ORIGINAL_DISK_SIZE, + ORIGINAL_MEMORY_SIZE, + OS_ARCH, + OS_NAME, + OS_VERSION, + OUTPUT, + OUTPUT_BUFFER, + OVERHEAD_MEMORY_SIZE, + PAGE_SIZE, + PARENT_STAGES, + PARSE_MODE, + PARTITIONED_FILE_READER, + PARTITIONER, + PARTITION_ID, + PARTITION_IDS, + PARTITION_SIZE, + PARTITION_SPECIFICATION, + PARTITION_SPECS, + PATH, + PATHS, + PEER, + PENDING_TIMES, + PERCENT, + PIPELINE_STAGE_UID, + PLUGIN_NAME, + POD_ID, + POD_NAME, + POD_NAMESPACE, + POD_PHASE, + POD_STATE, + POINT_OF_CENTER, + POLICY, + POOL_NAME, + PORT, + PORT2, + POST_SCAN_FILTERS, + PREDICATE, + PREDICATES, + PREFERRED_SERVICE_NAME, + PREFIX, + PRETTY_ID_STRING, + PRINCIPAL, + PROCESS, + PROCESSING_TIME, + PRODUCER_ID, + PROPERTY_NAME, + PROPORTIONAL, + PROTOCOL_VERSION, + PROVIDER, + PUSHED_FILTERS, + PUSH_MERGED_LOCAL_BLOCKS_SIZE, + PVC_METADATA_NAME, + PYTHON_EXEC, + PYTHON_PACKAGES, + PYTHON_VERSION, + PYTHON_WORKER_CHANNEL_IS_BLOCKING_MODE, + PYTHON_WORKER_CHANNEL_IS_CONNECTED, + PYTHON_WORKER_HAS_INPUTS, + PYTHON_WORKER_IDLE_TIMEOUT, + PYTHON_WORKER_IS_ALIVE, + PYTHON_WORKER_MODULE, + PYTHON_WORKER_RESPONSE, + PYTHON_WORKER_SELECTION_KEY_INTERESTS, + PYTHON_WORKER_SELECTION_KEY_IS_VALID, + PYTHON_WORKER_SELECTOR_IS_OPEN, + QUANTILES, + QUERY_CACHE_VALUE, + QUERY_HINT, + QUERY_ID, + QUERY_PLAN, + QUERY_PLAN_COMPARISON, + QUERY_PLAN_LENGTH_ACTUAL, + QUERY_PLAN_LENGTH_MAX, + QUERY_RUN_ID, + RANGE, + RATE_LIMIT, + RATIO, + RDD, + RDD_CHECKPOINT_DIR, + RDD_DEBUG_STRING, + RDD_DESCRIPTION, + RDD_ID, + READ_LIMIT, + REASON, + REATTACHABLE, + RECEIVED_BLOCK_INFO, + RECEIVED_BLOCK_TRACKER_LOG_EVENT, + RECEIVER_ID, + RECEIVER_IDS, + RECORDS, + RECOVERY_STATE, + RECURSIVE_DEPTH, + REDACTED_STATEMENT, + REDUCE_ID, + REGEX, + REGISTERED_EXECUTOR_FILE, + REGISTER_MERGE_RESULTS, + RELATION_NAME, + RELATION_OUTPUT, + RELATIVE_TOLERANCE, + RELEASED_LOCKS, + REMAINING_PARTITIONS, + REMOTE_ADDRESS, + REMOTE_BLOCKS_SIZE, + REMOVE_FROM_MASTER, + REPORT_DETAILS, + REQUESTER_SIZE, + REQUEST_EXECUTORS, + REQUEST_ID, + RESOURCE, + RESOURCE_NAME, + RESOURCE_PROFILE_ID, + RESOURCE_PROFILE_IDS, + RESOURCE_PROFILE_TO_TOTAL_EXECS, + RESPONSE_BODY_SIZE, + RESTART_TIME, + RESULT, + RESULT_SIZE_BYTES, + RESULT_SIZE_BYTES_MAX, + RETRY_INTERVAL, + RETRY_WAIT_TIME, + RIGHT_EXPR, + RIGHT_LOGICAL_PLAN_STATS_SIZE_IN_BYTES, + RMSE, + ROCKS_DB_FILE_MAPPING, + ROCKS_DB_LOG_LEVEL, + ROCKS_DB_LOG_MESSAGE, + RPC_ADDRESS, + RPC_ENDPOINT_REF, + RPC_MESSAGE_CAPACITY, + RPC_SSL_ENABLED, + RULE_EXECUTOR_NAME, + RULE_NAME, + RUN_ID, + RUN_ID_STRING, + SCALA_VERSION, + SCALING_DOWN_RATIO, + SCALING_UP_RATIO, + SCHEDULER_POOL_NAME, + SCHEDULING_MODE, + SCHEMA, + SCHEMA2, + SERVER_NAME, + SERVICE_NAME, + SERVLET_CONTEXT_HANDLER_PATH, + SESSION_HANDLE, + SESSION_HOLD_INFO, + SESSION_ID, + SESSION_KEY, + SET_CLIENT_INFO_REQUEST, + SHARD_ID, + SHORTER_SERVICE_NAME, + SHORT_USER_NAME, + SHUFFLE_BLOCK_INFO, + SHUFFLE_DB_BACKEND_KEY, + SHUFFLE_DB_BACKEND_NAME, + SHUFFLE_ID, + SHUFFLE_IDS, + SHUFFLE_MERGE_ID, + SHUFFLE_MERGE_RECOVERY_FILE, + SHUFFLE_SERVICE_CONF_OVERLAY_URL, + SHUFFLE_SERVICE_METRICS_NAMESPACE, + SHUFFLE_SERVICE_NAME, + SIGMAS_LENGTH, + SIGNAL, + SINK, + SIZE, + SLEEP_TIME, + SLIDE_DURATION, + SMALLEST_CLUSTER_INDEX, + SNAPSHOT_EVENT, + SNAPSHOT_EVENT_TIME_DELTA, + SNAPSHOT_EVENT_VERSION_DELTA, + SNAPSHOT_VERSION, + SOCKET_ADDRESS, + SOURCE, + SOURCE_PATH, + SPARK_BRANCH, + SPARK_BUILD_DATE, + SPARK_BUILD_USER, + SPARK_DATA_STREAM, + SPARK_PLAN_ID, + SPARK_REPO_URL, + SPARK_REVISION, + SPARK_VERSION, + SPILL_RECORDS_SIZE, + SPILL_RECORDS_SIZE_THRESHOLD, + SPILL_TIMES, + SQL_TEXT, + SRC_PATH, + STAGE, + STAGES, + STAGE_ATTEMPT, + STAGE_ATTEMPT_ID, + STAGE_ID, + STAGE_NAME, + STAMP, + START_INDEX, + START_TIME, + STATEMENT_ID, + STATE_NAME, + STATE_STORE_COORDINATOR, + STATE_STORE_ID, + STATE_STORE_PROVIDER, + STATE_STORE_PROVIDER_ID, + STATE_STORE_PROVIDER_IDS, + STATE_STORE_STATE, + STATE_STORE_VERSION, + STATS, + STATUS, + STDERR, + STOP_SITE_SHORT_FORM, + STORAGE_LEVEL, + STORAGE_LEVEL_DESERIALIZED, + STORAGE_LEVEL_REPLICATION, + STORAGE_MEMORY_SIZE, + STORE_ID, + STRATEGY, + STREAMING_CONTEXT, + STREAMING_DATA_SOURCE_DESCRIPTION, + STREAMING_DATA_SOURCE_NAME, + STREAMING_OFFSETS_END, + STREAMING_OFFSETS_START, + STREAMING_QUERY_PROGRESS, + STREAMING_SOURCE, + STREAMING_TABLE, + STREAMING_WRITE, + STREAM_CHUNK_ID, + STREAM_ID, + STREAM_NAME, + SUBMISSION_ID, + SUBSAMPLING_RATE, + SUB_QUERY, + TABLE_NAME, + TABLE_TYPE, + TABLE_TYPES, + TAG, + TARGET_NUM_EXECUTOR, + TARGET_NUM_EXECUTOR_DELTA, + TARGET_PATH, + TARGET_SIZE, + TASK_ATTEMPT_ID, + TASK_ID, + TASK_INDEX, + TASK_LOCALITY, + TASK_NAME, + TASK_REQUIREMENTS, + TASK_RESOURCES, + TASK_RESOURCE_ASSIGNMENTS, + TASK_SET_MANAGER, + TASK_SET_NAME, + TASK_STATE, + TEMP_FILE, + TEMP_OUTPUT_PATH, + TEMP_PATH, + TEST_SIZE, + THREAD, + THREAD_ID, + THREAD_NAME, + THREAD_POOL_KEEPALIVE_TIME, + THREAD_POOL_SIZE, + THREAD_POOL_WAIT_QUEUE_SIZE, + THRESHOLD, + THRESH_TIME, + TIME, + TIMEOUT, + TIMER, + TIMESTAMP, + TIME_UNITS, + TIP, + TOKEN, + TOKEN_KIND, + TOKEN_REGEX, + TOKEN_RENEWER, + TOPIC, + TOPIC_PARTITION, + TOPIC_PARTITIONS, + TOPIC_PARTITION_OFFSET, + TOPIC_PARTITION_OFFSET_RANGE, + TOTAL, + TOTAL_EFFECTIVE_TIME, + TOTAL_SIZE, + TOTAL_TIME, + TOTAL_TIME_READ, + TO_TIME, + TRAINING_SIZE, + TRAIN_VALIDATION_SPLIT_METRIC, + TRAIN_VALIDATION_SPLIT_METRICS, + TRANSFER_TYPE, + TREE_NODE, + TRIGGER_INTERVAL, + UI_ACLS, + UI_FILTER, + UI_FILTER_PARAMS, + UI_PROXY_BASE, + UNKNOWN_PARAM, + UNSUPPORTED_EXPR, + UNSUPPORTED_HINT_REASON, + UNTIL_OFFSET, + UPPER_BOUND, + URI, + URIS, + URL, + URL2, + URLS, + USER_ID, + USER_NAME, + UUID, + VALUE, + VERSIONS_TO_DELETE, + VERSION_NUM, + VIEW_ACLS, + VIEW_ACLS_GROUPS, + VIRTUAL_CORES, + VOCAB_SIZE, + WAIT_RESULT_TIME, + WAIT_SEND_TIME, + WATERMARK_CONSTRAINT, + WEB_URL, + WEIGHT, + WORKER, + WORKER_HOST, + WORKER_ID, + WORKER_PORT, + WORKER_URL, + WRITE_AHEAD_LOG_INFO, + WRITE_AHEAD_LOG_RECORD_HANDLE, + WRITE_JOB_UUID, + XML_SCHEDULING_MODE, + XSD_PATH, + YARN_RESOURCE, + YOUNG_GENERATION_GC, + ZERO_TIME +} diff --git a/common/utils-java/src/main/java/org/apache/spark/internal/MDC.java b/common/utils-java/src/main/java/org/apache/spark/internal/MDC.java new file mode 100644 index 0000000000000..341967812c84b --- /dev/null +++ b/common/utils-java/src/main/java/org/apache/spark/internal/MDC.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.internal; + +/** + * Mapped Diagnostic Context (MDC) that will be used in log messages. + * The values of the MDC will be inline in the log message, while the key-value pairs will be + * part of the ThreadContext. + */ +public record MDC(LogKey key, Object value) { + public static MDC of(LogKey key, Object value) { + return new MDC(key, value); + } +} diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java b/common/utils-java/src/main/java/org/apache/spark/internal/SparkLogger.java similarity index 96% rename from common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java rename to common/utils-java/src/main/java/org/apache/spark/internal/SparkLogger.java index 8c210a4fab3c3..84d6d7cf4238c 100644 --- a/common/utils/src/main/java/org/apache/spark/internal/SparkLogger.java +++ b/common/utils-java/src/main/java/org/apache/spark/internal/SparkLogger.java @@ -18,6 +18,7 @@ package org.apache.spark.internal; import java.util.HashMap; +import java.util.Locale; import java.util.Map; import java.util.function.Consumer; @@ -51,7 +52,7 @@ * * import org.apache.spark.internal.LogKeys; * import org.apache.spark.internal.MDC; - * logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID$.MODULE$, i)); + * logger.error("Unable to delete file for partition {}", MDC.of(LogKeys.PARTITION_ID, i)); *

* * Constant String Messages: @@ -65,8 +66,10 @@ * you can define `custom LogKey` and use it in `java` code as follows: *

* - * // To add a `custom LogKey`, implement `LogKey` - * public static class CUSTOM_LOG_KEY implements LogKey { } + * // Add a `CustomLogKeys`, implement `LogKey` + * public enum CustomLogKeys implements LogKey { + * CUSTOM_LOG_KEY + * } * import org.apache.spark.internal.MDC; * logger.error("Unable to delete key {} for cache", MDC.of(CUSTOM_LOG_KEY, "key")); */ @@ -222,8 +225,8 @@ private void withLogContext( for (int index = 0; index < mdcs.length; index++) { MDC mdc = mdcs[index]; String value = (mdc.value() != null) ? mdc.value().toString() : null; - if (Logging$.MODULE$.isStructuredLoggingEnabled()) { - context.put(mdc.key().name(), value); + if (SparkLoggerFactory.isStructuredLoggingEnabled()) { + context.put(mdc.key().name().toLowerCase(Locale.ROOT), value); } args[index] = value; } diff --git a/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java b/common/utils-java/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java similarity index 77% rename from common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java rename to common/utils-java/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java index a59c007362419..f5be570fa5b39 100644 --- a/common/utils/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java +++ b/common/utils-java/src/main/java/org/apache/spark/internal/SparkLoggerFactory.java @@ -23,6 +23,20 @@ public class SparkLoggerFactory { + private static volatile boolean structuredLoggingEnabled = false; + + public static void enableStructuredLogging() { + structuredLoggingEnabled = true; + } + + public static void disableStructuredLogging() { + structuredLoggingEnabled = false; + } + + public static boolean isStructuredLoggingEnabled() { + return structuredLoggingEnabled; + } + public static SparkLogger getLogger(String name) { return new SparkLogger(LoggerFactory.getLogger(name)); } diff --git a/common/utils/src/main/java/org/apache/spark/memory/MemoryMode.java b/common/utils-java/src/main/java/org/apache/spark/memory/MemoryMode.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/memory/MemoryMode.java rename to common/utils-java/src/main/java/org/apache/spark/memory/MemoryMode.java diff --git a/common/utils/src/main/java/org/apache/spark/network/util/ByteUnit.java b/common/utils-java/src/main/java/org/apache/spark/network/util/ByteUnit.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/network/util/ByteUnit.java rename to common/utils-java/src/main/java/org/apache/spark/network/util/ByteUnit.java diff --git a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java similarity index 55% rename from common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java rename to common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java index 94f9f02ed2c9b..cf500926fa3aa 100644 --- a/common/utils/src/main/java/org/apache/spark/network/util/JavaUtils.java +++ b/common/utils-java/src/main/java/org/apache/spark/network/util/JavaUtils.java @@ -18,18 +18,26 @@ package org.apache.spark.network.util; import java.io.*; +import java.net.URL; import java.nio.ByteBuffer; import java.nio.channels.ReadableByteChannel; import java.nio.charset.StandardCharsets; +import java.nio.file.DirectoryStream; import java.nio.file.Files; import java.nio.file.LinkOption; +import java.nio.file.Path; +import java.nio.file.FileVisitOption; +import java.nio.file.FileVisitResult; +import java.nio.file.SimpleFileVisitor; +import java.nio.file.StandardCopyOption; import java.nio.file.attribute.BasicFileAttributes; import java.util.*; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicLong; import java.util.regex.Matcher; import java.util.regex.Pattern; - -import org.apache.commons.lang3.SystemUtils; +import java.util.stream.Stream; +import java.util.stream.Collectors; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -60,6 +68,109 @@ public static void closeQuietly(Closeable closeable) { } } + /** Delete a file or directory and its contents recursively without throwing exceptions. */ + public static void deleteQuietly(File file) { + if (file != null && file.exists()) { + Path path = file.toPath(); + try (Stream walk = Files.walk(path)) { + walk.sorted(Comparator.reverseOrder()).map(Path::toFile).forEach(File::delete); + } catch (Exception ignored) { /* No-op */ } + } + } + + /** Registers the file or directory for deletion when the JVM exists. */ + public static void forceDeleteOnExit(File file) throws IOException { + if (file != null && file.exists()) { + if (!file.isDirectory()) { + file.deleteOnExit(); + } else { + Path path = file.toPath(); + Files.walkFileTree(path, new SimpleFileVisitor() { + @Override + public FileVisitResult preVisitDirectory(Path p, BasicFileAttributes a) + throws IOException { + p.toFile().deleteOnExit(); + return a.isSymbolicLink() ? FileVisitResult.SKIP_SUBTREE : FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFile(Path p, BasicFileAttributes a) throws IOException { + p.toFile().deleteOnExit(); + return FileVisitResult.CONTINUE; + } + }); + } + } + } + + /** Move a file from src to dst. */ + public static void moveFile(File src, File dst) throws IOException { + if (src == null || dst == null || !src.exists() || src.isDirectory() || dst.exists()) { + throw new IllegalArgumentException("Invalid input " + src + " or " + dst); + } + if (!src.renameTo(dst)) { // Try to use File.renameTo first + Files.move(src.toPath(), dst.toPath()); + } + } + + /** Move a directory from src to dst. */ + public static void moveDirectory(File src, File dst) throws IOException { + if (src == null || dst == null || !src.exists() || !src.isDirectory() || dst.exists()) { + throw new IllegalArgumentException("Invalid input " + src + " or " + dst); + } + if (!src.renameTo(dst)) { + Path from = src.toPath().toAbsolutePath().normalize(); + Path to = dst.toPath().toAbsolutePath().normalize(); + if (to.startsWith(from)) { + throw new IllegalArgumentException("Cannot move directory to itself or its subdirectory"); + } + moveDirectory(from, to); + } + } + + private static void moveDirectory(Path src, Path dst) throws IOException { + Files.createDirectories(dst); + try (DirectoryStream stream = Files.newDirectoryStream(src)) { + for (Path from : stream) { + Path to = dst.resolve(from.getFileName()); + if (Files.isDirectory(from)) { + moveDirectory(from, to); + } else { + Files.move(from, to, StandardCopyOption.REPLACE_EXISTING); + } + } + } + Files.delete(src); + } + + /** Copy src to the target directory simply. File attribute times are not copied. */ + public static void copyDirectory(File src, File dst) throws IOException { + if (src == null || dst == null || !src.exists() || !src.isDirectory() || + (dst.exists() && !dst.isDirectory())) { + throw new IllegalArgumentException("Invalid input file " + src + " or directory " + dst); + } + Path from = src.toPath().toAbsolutePath().normalize(); + Path to = dst.toPath().toAbsolutePath().normalize(); + if (to.startsWith(from)) { + throw new IllegalArgumentException("Cannot copy directory to itself or its subdirectory"); + } + Files.createDirectories(to); + Files.walkFileTree(from, new SimpleFileVisitor() { + @Override + public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) + throws IOException { + Files.createDirectories(to.resolve(from.relativize(dir))); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.copy(file, to.resolve(from.relativize(file)), StandardCopyOption.REPLACE_EXISTING); + return FileVisitResult.CONTINUE; + } + }); + } + /** Returns a hash consistent with Spark's Utils.nonNegativeHash(). */ public static int nonNegativeHash(Object obj) { if (obj == null) { return 0; } @@ -83,6 +194,49 @@ public static String bytesToString(ByteBuffer b) { return StandardCharsets.UTF_8.decode(b.slice()).toString(); } + public static long sizeOf(File file) throws IOException { + if (!file.exists()) { + throw new IllegalArgumentException(file.getAbsolutePath() + " not found"); + } + return sizeOf(file.toPath()); + } + + public static long sizeOf(Path dirPath) throws IOException { + AtomicLong size = new AtomicLong(0); + Files.walkFileTree(dirPath, new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + size.addAndGet(attrs.size()); + return FileVisitResult.CONTINUE; + } + }); + return size.get(); + } + + public static void cleanDirectory(File dir) throws IOException { + if (dir == null || !dir.exists() || !dir.isDirectory()) { + throw new IllegalArgumentException("Invalid input directory " + dir); + } + cleanDirectory(dir.toPath()); + } + + private static void cleanDirectory(Path rootDir) throws IOException { + Files.walkFileTree(rootDir, new SimpleFileVisitor() { + @Override + public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) throws IOException { + Files.delete(file); + return FileVisitResult.CONTINUE; + } + + @Override + public FileVisitResult postVisitDirectory(Path dir, IOException e) throws IOException { + if (e != null) throw e; + if (!dir.equals(rootDir)) Files.delete(dir); + return FileVisitResult.CONTINUE; + } + }); + } + /** * Delete a file or directory and its contents recursively. * Don't follow directories if they are symlinks. @@ -110,14 +264,13 @@ public static void deleteRecursively(File file, FilenameFilter filter) // On Unix systems, use operating system command to run faster // If that does not work out, fallback to the Java IO way // We exclude Apple Silicon test environment due to the limited resource issues. - if (SystemUtils.IS_OS_UNIX && filter == null && !(SystemUtils.IS_OS_MAC_OSX && - (System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null))) { + if (isUnix && filter == null && !(isMac && isTesting())) { try { deleteRecursivelyUsingUnixNative(file); return; } catch (IOException e) { logger.warn("Attempt to delete using native Unix OS command failed for path = {}. " + - "Falling back to Java IO way", e, MDC.of(LogKeys.PATH$.MODULE$, file.getAbsolutePath())); + "Falling back to Java IO way", e, MDC.of(LogKeys.PATH, file.getAbsolutePath())); } } @@ -212,6 +365,25 @@ private static File[] listFilesSafely(File file, FilenameFilter filter) throws I } } + public static Set listPaths(File dir) throws IOException { + if (dir == null) throw new IllegalArgumentException("Input directory is null"); + if (!dir.exists() || !dir.isDirectory()) return Collections.emptySet(); + try (var stream = Files.walk(dir.toPath(), FileVisitOption.FOLLOW_LINKS)) { + return stream.filter(Files::isRegularFile).collect(Collectors.toCollection(HashSet::new)); + } + } + + public static Set listFiles(File dir) throws IOException { + if (dir == null) throw new IllegalArgumentException("Input directory is null"); + if (!dir.exists() || !dir.isDirectory()) return Collections.emptySet(); + try (var stream = Files.walk(dir.toPath(), FileVisitOption.FOLLOW_LINKS)) { + return stream + .filter(Files::isRegularFile) + .map(Path::toFile) + .collect(Collectors.toCollection(HashSet::new)); + } + } + private static final Map timeSuffixes; private static final Map byteSuffixes; @@ -415,7 +587,7 @@ public static File createDirectory(String root, String namePrefix) throws IOExce dir = new File(root, namePrefix + "-" + UUID.randomUUID()); Files.createDirectories(dir.toPath()); } catch (IOException | SecurityException e) { - logger.error("Failed to create directory {}", e, MDC.of(LogKeys.PATH$.MODULE$, dir)); + logger.error("Failed to create directory {}", e, MDC.of(LogKeys.PATH, dir)); dir = null; } } @@ -435,4 +607,154 @@ public static void readFully(ReadableByteChannel channel, ByteBuffer dst) throws } } + /** + * Read len bytes exactly, otherwise throw exceptions. + */ + public static void readFully(InputStream in, byte[] arr, int off, int len) throws IOException { + if (in == null || len < 0 || (off < 0 || off > arr.length - len)) { + throw new IllegalArgumentException("Invalid input argument"); + } + if (len != in.readNBytes(arr, off, len)) { + throw new EOFException("Fail to read " + len + " bytes."); + } + } + + /** + * Copy the content of a URL into a file. + */ + public static void copyURLToFile(URL url, File file) throws IOException { + if (url == null || file == null || (file.exists() && file.isDirectory())) { + throw new IllegalArgumentException("Invalid input " + url + " or " + file); + } + Files.createDirectories(file.getParentFile().toPath()); + try (InputStream in = url.openStream()) { + Files.copy(in, file.toPath(), StandardCopyOption.REPLACE_EXISTING); + } + } + + public static String join(List arr, String sep) { + if (arr == null) return ""; + StringJoiner joiner = new StringJoiner(sep == null ? "" : sep); + for (Object a : arr) { + joiner.add(a == null ? "" : a.toString()); + } + return joiner.toString(); + } + + public static String stackTraceToString(Throwable t) { + if (t == null) { + return ""; + } + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try (PrintWriter writer = new PrintWriter(out)) { + t.printStackTrace(writer); + writer.flush(); + } + return out.toString(StandardCharsets.UTF_8); + } + + public static int checkedCast(long value) { + if (value > Integer.MAX_VALUE || value < Integer.MIN_VALUE) { + throw new IllegalArgumentException("Cannot cast to integer."); + } + return (int) value; + } + + /** Return true if the content of the files are equal or they both don't exist */ + public static boolean contentEquals(File file1, File file2) throws IOException { + if (file1 == null && file2 != null || file1 != null && file2 == null) { + return false; + } else if (file1 == null && file2 == null || !file1.exists() && !file2.exists()) { + return true; + } else if (!file1.exists() || !file2.exists()) { + return false; + } else if (file1.isDirectory() || file2.isDirectory()) { + throw new IllegalArgumentException("Input is not a file: %s or %s".formatted(file1, file2)); + } else if (file1.length() != file2.length()) { + return false; + } else { + Path path1 = file1.toPath(); + Path path2 = file2.toPath(); + return Files.isSameFile(path1, path2) || Files.mismatch(path1, path2) == -1L; + } + } + + public static String toString(InputStream in) throws IOException { + return new String(in.readAllBytes(), StandardCharsets.UTF_8); + } + + /** + * Indicates whether Spark is currently running unit tests. + */ + public static boolean isTesting() { + return System.getenv("SPARK_TESTING") != null || System.getProperty("spark.testing") != null; + } + + /** + * The `os.name` system property. + */ + public static String osName = System.getProperty("os.name"); + + /** + * The `os.version` system property. + */ + public static String osVersion = System.getProperty("os.version"); + + /** + * The `java.version` system property. + */ + public static String javaVersion = Runtime.version().toString(); + + /** + * The `os.arch` system property. + */ + public static String osArch = System.getProperty("os.arch"); + + /** + * Whether the underlying operating system is Windows. + */ + public static boolean isWindows = osName.regionMatches(true, 0, "Windows", 0, 7); + + /** + * Whether the underlying operating system is Mac OS X. + */ + public static boolean isMac = osName.regionMatches(true, 0, "Mac OS X", 0, 8); + + /** + * Whether the underlying operating system is Mac OS X and processor is Apple Silicon. + */ + public static boolean isMacOnAppleSilicon = isMac && osArch.equals("aarch64"); + + /** + * Whether the underlying operating system is Linux. + */ + public static boolean isLinux = osName.regionMatches(true, 0, "Linux", 0, 5); + + /** + * Whether the underlying operating system is UNIX. + */ + public static boolean isUnix = Stream.of("AIX", "HP-UX", "Irix", "Linux", "Mac OS X", "Solaris", + "SunOS", "FreeBSD", "OpenBSD", "NetBSD") + .anyMatch(prefix -> osName.regionMatches(true, 0, prefix, 0, prefix.length())); + + /** + * Throws IllegalArgumentException with the given message if the check is false. + * Keep this clone of CommandBuilderUtils.checkArgument synced with the original. + */ + public static void checkArgument(boolean check, String msg, Object... args) { + if (!check) { + throw new IllegalArgumentException(String.format(msg, args)); + } + } + + /** + * Throws IllegalStateException with the given message if the check is false. + * Keep this clone of CommandBuilderUtils.checkState synced with the original. + */ + public static void checkState(boolean check, String msg, Object... args) { + if (!check) { + throw new IllegalStateException(String.format(msg, args)); + } + } } diff --git a/common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java b/common/utils-java/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java similarity index 100% rename from common/utils/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java rename to common/utils-java/src/main/java/org/apache/spark/unsafe/array/ByteArrayUtils.java diff --git a/common/utils-java/src/main/java/org/apache/spark/util/Pair.java b/common/utils-java/src/main/java/org/apache/spark/util/Pair.java new file mode 100644 index 0000000000000..bdcc01b49dcf4 --- /dev/null +++ b/common/utils-java/src/main/java/org/apache/spark/util/Pair.java @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.util; + +/** + * An immutable pair of values. Note that the fields are intentionally designed to be `getLeft` and + * `getRight` instead of `left` and `right` in order to mitigate the migration burden + * from `org.apache.commons.lang3.tuple.Pair`. + */ +public record Pair(L getLeft, R getRight) { + public static Pair of(L left, R right) { + return new Pair<>(left, right); + } +} diff --git a/common/utils/src/main/resources/org/apache/spark/SparkLayout.json b/common/utils-java/src/main/resources/org/apache/spark/SparkLayout.json similarity index 100% rename from common/utils/src/main/resources/org/apache/spark/SparkLayout.json rename to common/utils-java/src/main/resources/org/apache/spark/SparkLayout.json diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties b/common/utils-java/src/main/resources/org/apache/spark/log4j2-defaults.properties similarity index 100% rename from common/utils/src/main/resources/org/apache/spark/log4j2-defaults.properties rename to common/utils-java/src/main/resources/org/apache/spark/log4j2-defaults.properties diff --git a/common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties b/common/utils-java/src/main/resources/org/apache/spark/log4j2-json-layout.properties similarity index 100% rename from common/utils/src/main/resources/org/apache/spark/log4j2-json-layout.properties rename to common/utils-java/src/main/resources/org/apache/spark/log4j2-json-layout.properties diff --git a/connect-examples/server-library-example/common/src/main/protobuf/base.proto b/common/utils-java/src/test/java/org/apache/spark/util/CustomLogKeys.java similarity index 75% rename from connect-examples/server-library-example/common/src/main/protobuf/base.proto rename to common/utils-java/src/test/java/org/apache/spark/util/CustomLogKeys.java index 9d902a587ed37..cadacba7c5175 100644 --- a/connect-examples/server-library-example/common/src/main/protobuf/base.proto +++ b/common/utils-java/src/test/java/org/apache/spark/util/CustomLogKeys.java @@ -15,14 +15,10 @@ * limitations under the License. */ -syntax = 'proto3'; +package org.apache.spark.util; -option java_multiple_files = true; -option java_package = "org.apache.connect.examples.serverlibrary.proto"; +import org.apache.spark.internal.LogKey; -message CustomTable { - // Path to the custom table. - string path = 1; - // Name of the custom table. - string name = 2; +public enum CustomLogKeys implements LogKey { + CUSTOM_LOG_KEY } diff --git a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java b/common/utils-java/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java similarity index 90% rename from common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java rename to common/utils-java/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java index 6bfe595def1d4..7f8f3f93a8d46 100644 --- a/common/utils/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java +++ b/common/utils-java/src/test/java/org/apache/spark/util/PatternSparkLoggerSuite.java @@ -90,12 +90,7 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) { } @Override - String expectedPatternForScalaCustomLogKey(Level level) { - return toRegexPattern(level, ".* : Scala custom log message.\n"); - } - - @Override - String expectedPatternForJavaCustomLogKey(Level level) { - return toRegexPattern(level, ".* : Java custom log message.\n"); + String expectedPatternForCustomLogKey(Level level) { + return toRegexPattern(level, ".* : Custom log message.\n"); } } diff --git a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java b/common/utils-java/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java similarity index 83% rename from common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java rename to common/utils-java/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java index 186088ede1d0b..d86fe12c89243 100644 --- a/common/utils/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java +++ b/common/utils-java/src/test/java/org/apache/spark/util/SparkLoggerSuiteBase.java @@ -22,11 +22,9 @@ import java.nio.file.Files; import java.util.List; -import org.apache.commons.lang3.tuple.Pair; import org.apache.logging.log4j.Level; import org.junit.jupiter.api.Test; -import org.apache.spark.internal.LogKey; import org.apache.spark.internal.LogKeys; import org.apache.spark.internal.MDC; import org.apache.spark.internal.SparkLogger; @@ -73,23 +71,20 @@ private void checkLogOutput(Level level, Runnable func, ExpectedResult result) { private final String basicMsgWithEscapeChar = "This is a log message\nThis is a new line \t other msg"; - private final MDC executorIDMDC = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"); + private final MDC executorIDMDC = MDC.of(LogKeys.EXECUTOR_ID, "1"); private final String msgWithMDC = "Lost executor {}."; private final MDC[] mdcs = new MDC[] { - MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, "1"), - MDC.of(LogKeys.REASON$.MODULE$, "the shuffle data is too large")}; + MDC.of(LogKeys.EXECUTOR_ID, "1"), + MDC.of(LogKeys.REASON, "the shuffle data is too large")}; private final String msgWithMDCs = "Lost executor {}, reason: {}"; private final MDC[] emptyMDCs = new MDC[0]; - private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID$.MODULE$, null); + private final MDC executorIDMDCValueIsNull = MDC.of(LogKeys.EXECUTOR_ID, null); - private final MDC scalaCustomLogMDC = - MDC.of(CustomLogKeys.CUSTOM_LOG_KEY$.MODULE$, "Scala custom log message."); - - private final MDC javaCustomLogMDC = - MDC.of(JavaCustomLogKeys.CUSTOM_LOG_KEY, "Java custom log message."); + private final MDC customLogMDC = + MDC.of(CustomLogKeys.CUSTOM_LOG_KEY, "Custom log message."); // test for basic message (without any mdc) abstract String expectedPatternForBasicMsg(Level level); @@ -118,10 +113,7 @@ String expectedPatternForMsgWithEmptyMDCsAndException(Level level) { abstract String expectedPatternForMsgWithMDCValueIsNull(Level level); // test for scala custom LogKey - abstract String expectedPatternForScalaCustomLogKey(Level level); - - // test for java custom LogKey - abstract String expectedPatternForJavaCustomLogKey(Level level); + abstract String expectedPatternForCustomLogKey(Level level); @Test public void testBasicMsg() { @@ -241,34 +233,14 @@ public void testLoggerWithMDCValueIsNull() { } @Test - public void testLoggerWithScalaCustomLogKey() { - Runnable errorFn = () -> logger().error("{}", scalaCustomLogMDC); - Runnable warnFn = () -> logger().warn("{}", scalaCustomLogMDC); - Runnable infoFn = () -> logger().info("{}", scalaCustomLogMDC); - List.of( - Pair.of(Level.ERROR, errorFn), - Pair.of(Level.WARN, warnFn), - Pair.of(Level.INFO, infoFn)).forEach(pair -> - checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForScalaCustomLogKey)); - } - - @Test - public void testLoggerWithJavaCustomLogKey() { - Runnable errorFn = () -> logger().error("{}", javaCustomLogMDC); - Runnable warnFn = () -> logger().warn("{}", javaCustomLogMDC); - Runnable infoFn = () -> logger().info("{}", javaCustomLogMDC); + public void testLoggerWithCustomLogKey() { + Runnable errorFn = () -> logger().error("{}", customLogMDC); + Runnable warnFn = () -> logger().warn("{}", customLogMDC); + Runnable infoFn = () -> logger().info("{}", customLogMDC); List.of( Pair.of(Level.ERROR, errorFn), Pair.of(Level.WARN, warnFn), Pair.of(Level.INFO, infoFn)).forEach(pair -> - checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForJavaCustomLogKey)); + checkLogOutput(pair.getLeft(), pair.getRight(), this::expectedPatternForCustomLogKey)); } } - -class JavaCustomLogKeys { - // Custom `LogKey` must be `implements LogKey` - public static class CUSTOM_LOG_KEY implements LogKey { } - - // Singleton - public static final CUSTOM_LOG_KEY CUSTOM_LOG_KEY = new CUSTOM_LOG_KEY(); -} diff --git a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java b/common/utils-java/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java similarity index 88% rename from common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java rename to common/utils-java/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java index 1fab167adfeb0..88ac8ea34710a 100644 --- a/common/utils/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java +++ b/common/utils-java/src/test/java/org/apache/spark/util/StructuredSparkLoggerSuite.java @@ -24,7 +24,6 @@ import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.BeforeAll; -import org.apache.spark.internal.Logging$; import org.apache.spark.internal.SparkLogger; import org.apache.spark.internal.SparkLoggerFactory; @@ -33,13 +32,13 @@ public class StructuredSparkLoggerSuite extends SparkLoggerSuiteBase { // Enable Structured Logging before running the tests @BeforeAll public static void setup() { - Logging$.MODULE$.enableStructuredLogging(); + SparkLoggerFactory.enableStructuredLogging(); } // Disable Structured Logging after running the tests @AfterAll public static void teardown() { - Logging$.MODULE$.disableStructuredLogging(); + SparkLoggerFactory.disableStructuredLogging(); } private static final SparkLogger LOGGER = @@ -176,28 +175,14 @@ String expectedPatternForMsgWithMDCValueIsNull(Level level) { } @Override - String expectedPatternForScalaCustomLogKey(Level level) { + String expectedPatternForCustomLogKey(Level level) { return compactAndToRegexPattern(level, """ { "ts": "", "level": "", - "msg": "Scala custom log message.", + "msg": "Custom log message.", "context": { - "custom_log_key": "Scala custom log message." - }, - "logger": "" - }"""); - } - - @Override - String expectedPatternForJavaCustomLogKey(Level level) { - return compactAndToRegexPattern(level, """ - { - "ts": "", - "level": "", - "msg": "Java custom log message.", - "context": { - "custom_log_key": "Java custom log message." + "custom_log_key": "Custom log message." }, "logger": "" }"""); diff --git a/common/utils-java/src/test/resources/log4j2.properties b/common/utils-java/src/test/resources/log4j2.properties new file mode 100644 index 0000000000000..cb38f5b55a0ba --- /dev/null +++ b/common/utils-java/src/test/resources/log4j2.properties @@ -0,0 +1,60 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +rootLogger.level = info +rootLogger.appenderRef.file.ref = ${sys:test.appender:-File} + +appender.file.type = File +appender.file.name = File +appender.file.fileName = target/unit-tests.log +appender.file.layout.type = JsonTemplateLayout +appender.file.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json + +# Structured Logging Appender +appender.structured.type = File +appender.structured.name = structured +appender.structured.fileName = target/structured.log +appender.structured.layout.type = JsonTemplateLayout +appender.structured.layout.eventTemplateUri = classpath:org/apache/spark/SparkLayout.json + +# Pattern Logging Appender +appender.pattern.type = File +appender.pattern.name = pattern +appender.pattern.fileName = target/pattern.log +appender.pattern.layout.type = PatternLayout +appender.pattern.layout.pattern = %d{yy/MM/dd HH:mm:ss} %p %c{1}: %m%n%ex + +# Custom loggers +logger.structured_logging.name = org.apache.spark.util.StructuredLoggingSuite +logger.structured_logging.level = trace +logger.structured_logging.appenderRefs = structured +logger.structured_logging.appenderRef.structured.ref = structured + +logger.pattern_logging.name = org.apache.spark.util.PatternLoggingSuite +logger.pattern_logging.level = trace +logger.pattern_logging.appenderRefs = pattern +logger.pattern_logging.appenderRef.pattern.ref = pattern + +logger.structured_logger.name = org.apache.spark.util.StructuredSparkLoggerSuite +logger.structured_logger.level = trace +logger.structured_logger.appenderRefs = structured +logger.structured_logger.appenderRef.structured.ref = structured + +logger.pattern_logger.name = org.apache.spark.util.PatternSparkLoggerSuite +logger.pattern_logger.level = trace +logger.pattern_logger.appenderRefs = pattern +logger.pattern_logger.appenderRef.pattern.ref = pattern diff --git a/common/utils/pom.xml b/common/utils/pom.xml index 44771938439ae..df3bc5adb10bd 100644 --- a/common/utils/pom.xml +++ b/common/utils/pom.xml @@ -39,6 +39,18 @@ org.apache.spark spark-tags_${scala.binary.version} + + org.apache.spark + spark-common-utils-java_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-common-utils-java_${scala.binary.version} + ${project.version} + test-jar + test + org.apache.xbean xbean-asm9-shaded @@ -51,14 +63,6 @@ com.fasterxml.jackson.module jackson-module-scala_${scala.binary.version} - - org.apache.commons - commons-text - - - commons-io - commons-io - org.apache.ivy ivy diff --git a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java index 39808f58b08ae..26d66ae3433ad 100644 --- a/common/utils/src/main/java/org/apache/spark/SparkThrowable.java +++ b/common/utils/src/main/java/org/apache/spark/SparkThrowable.java @@ -60,9 +60,37 @@ default boolean isInternalError() { return SparkThrowableHelper.isInternalError(this.getCondition()); } + // If null, the error message is not for a breaking change + default BreakingChangeInfo getBreakingChangeInfo() { + return SparkThrowableHelper.getBreakingChangeInfo( + this.getCondition()).getOrElse(() -> null); + } + default Map getMessageParameters() { return new HashMap<>(); } + /** + * Returns the default message template for this error. + * + * The template is a machine-readable string with placeholders + * to be filled by {@code getMessageParameters()}. + * + * This is the default template known to Spark, but clients are + * free to generate their own messages (e.g., translations, + * alternate formats) using the provided error metadata. + * + * @return the default message template for this error, or null if unavailable + */ + default String getDefaultMessageTemplate() { + try { + String cond = this.getCondition(); + if (cond == null) return null; + return SparkThrowableHelper.getMessageTemplate(cond); + } catch (Throwable t) { + return null; // Unknown error condition + } + } + default QueryContext[] getQueryContext() { return new QueryContext[0]; } } diff --git a/common/utils/src/main/resources/error/error-conditions.json b/common/utils/src/main/resources/error/error-conditions.json index fe14f4e827938..c3f2c49a446bd 100644 --- a/common/utils/src/main/resources/error/error-conditions.json +++ b/common/utils/src/main/resources/error/error-conditions.json @@ -90,6 +90,42 @@ ], "sqlState" : "42000" }, + "APPROX_TOP_K_MAX_ITEMS_TRACKED_EXCEEDS_LIMIT" : { + "message" : [ + "The max items tracked `maxItemsTracked`() of `approx_top_k` should be less than or equal to ." + ], + "sqlState" : "22023" + }, + "APPROX_TOP_K_MAX_ITEMS_TRACKED_LESS_THAN_K" : { + "message" : [ + "The max items tracked `maxItemsTracked`() of `approx_top_k` should be greater than or equal to `k`()." + ], + "sqlState" : "22023" + }, + "APPROX_TOP_K_NON_POSITIVE_ARG" : { + "message" : [ + "The value of in `approx_top_k` must be a positive integer, but got ." + ], + "sqlState" : "22023" + }, + "APPROX_TOP_K_NULL_ARG" : { + "message" : [ + "The value of in `approx_top_k` cannot be NULL." + ], + "sqlState" : "22004" + }, + "APPROX_TOP_K_SKETCH_SIZE_NOT_MATCH" : { + "message" : [ + "Combining approx_top_k sketches of different sizes is not allowed. Found sketches of size and ." + ], + "sqlState" : "42846" + }, + "APPROX_TOP_K_SKETCH_TYPE_NOT_MATCH" : { + "message" : [ + "Combining approx_top_k sketches of different types is not allowed. Found sketches of type and ." + ], + "sqlState" : "42846" + }, "ARITHMETIC_OVERFLOW" : { "message" : [ ". If necessary set to \"false\" to bypass this error." @@ -249,6 +285,24 @@ ], "sqlState" : "0A000" }, + "CANNOT_LOAD_CHECKPOINT_FILE_MANAGER" : { + "message" : [ + "Error loading streaming checkpoint file manager for path=." + ], + "subClass" : { + "ERROR_LOADING_CLASS" : { + "message" : [ + "Error instantiating streaming checkpoint file manager for path= with className=. msg=." + ] + }, + "UNCATEGORIZED" : { + "message" : [ + "" + ] + } + }, + "sqlState" : "58030" + }, "CANNOT_LOAD_FUNCTION_CLASS" : { "message" : [ "Cannot load class when registering the function , please make sure it is on the classpath." @@ -326,6 +380,11 @@ "The change log writer version cannot be ." ] }, + "INVALID_CHECKPOINT_LINEAGE" : { + "message" : [ + "Invalid checkpoint lineage: . " + ] + }, "KEY_ROW_FORMAT_VALIDATION_FAILURE" : { "message" : [ "" @@ -393,6 +452,12 @@ ], "sqlState" : "46110" }, + "CANNOT_MODIFY_STATIC_CONFIG" : { + "message" : [ + "Cannot modify the value of the static Spark config: ." + ], + "sqlState" : "46110" + }, "CANNOT_PARSE_DECIMAL" : { "message" : [ "Cannot parse decimal. Please ensure that the input is a valid number with optional decimal point or comma separators." @@ -518,6 +583,12 @@ ], "sqlState" : "22KD3" }, + "CANNOT_USE_MULTI_ALIASES_IN_WATERMARK_CLAUSE" : { + "message" : [ + "Multiple aliases are not supported in watermark clause." + ], + "sqlState" : "42000" + }, "CANNOT_WRITE_STATE_STORE" : { "message" : [ "Error writing state store files for provider ." @@ -853,6 +924,11 @@ "Please fit or load a model smaller than bytes." ] }, + "MODEL_SUMMARY_LOST" : { + "message" : [ + "The model summary is lost because the cached model is offloaded." + ] + }, "UNSUPPORTED_EXCEPTION" : { "message" : [ "" @@ -921,12 +997,24 @@ }, "sqlState" : "21S01" }, + "CYCLIC_FUNCTION_REFERENCE" : { + "message" : [ + "Cyclic function reference detected: ." + ], + "sqlState" : "42887" + }, "DATAFLOW_GRAPH_NOT_FOUND" : { "message" : [ "Dataflow graph with id could not be found" ], "sqlState" : "KD011" }, + "DATATYPE_CANNOT_ORDER" : { + "message" : [ + "Type does not support ordered operations." + ], + "sqlState" : "0A000" + }, "DATATYPE_MISMATCH" : { "message" : [ "Cannot resolve due to data type mismatch:" @@ -1435,6 +1523,12 @@ ], "sqlState" : "42711" }, + "DUPLICATE_VARIABLE_NAME_INSIDE_DECLARE" : { + "message" : [ + "Found duplicate variable in the declare variable list. Please, remove one of them." + ], + "sqlState" : "42734" + }, "EMITTING_ROWS_OLDER_THAN_WATERMARK_NOT_ALLOWED" : { "message" : [ "Previous node emitted a row with eventTime= which is older than current_watermark_value=", @@ -1781,12 +1875,6 @@ ], "sqlState" : "39000" }, - "FOUND_MULTIPLE_DATA_SOURCES" : { - "message" : [ - "Detected multiple data sources with the name ''. Please check the data source isn't simultaneously registered and located in the classpath." - ], - "sqlState" : "42710" - }, "GENERATED_COLUMN_WITH_DEFAULT_VALUE" : { "message" : [ "A column cannot have both a default value and a generation expression but column has default value: () and generation expression: ()." @@ -2443,6 +2531,29 @@ ], "sqlState" : "22P03" }, + "INVALID_CLONE_SESSION_REQUEST" : { + "message" : [ + "Invalid session clone request." + ], + "subClass" : { + "TARGET_SESSION_ID_ALREADY_CLOSED" : { + "message" : [ + "Cannot clone session to target session ID because a session with this ID was previously closed." + ] + }, + "TARGET_SESSION_ID_ALREADY_EXISTS" : { + "message" : [ + "Cannot clone session to target session ID because a session with this ID already exists." + ] + }, + "TARGET_SESSION_ID_FORMAT" : { + "message" : [ + "Target session ID for clone operation must be an UUID string of the format '00112233-4455-6677-8899-aabbccddeeff'." + ] + } + }, + "sqlState" : "42K04" + }, "INVALID_COLUMN_NAME_AS_PATH" : { "message" : [ "The datasource cannot save the column because its name contains some characters that are not allowed in file paths. Please, use an alias to rename it." @@ -2678,6 +2789,12 @@ ], "sqlState" : "42001" }, + "INVALID_EXPR_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : { + "message" : [ + "Expression type must be string type but got ." + ], + "sqlState" : "42K09" + }, "INVALID_EXTERNAL_TYPE" : { "message" : [ "The external type is not valid for the type at the expression ." @@ -2708,6 +2825,34 @@ ], "sqlState" : "42000" }, + "INVALID_FLOW_QUERY_TYPE" : { + "message" : [ + "Flow returns an invalid relation type." + ], + "subClass" : { + "BATCH_RELATION_FOR_STREAMING_TABLE" : { + "message" : [ + "Streaming tables may only be defined by streaming relations, but the flow attempts to write a batch relation to the streaming table . Consider using the STREAM operator in Spark-SQL to convert the batch relation into a streaming relation, or populating the streaming table with an append once-flow instead." + ] + }, + "STREAMING_RELATION_FOR_MATERIALIZED_VIEW" : { + "message" : [ + "Materialized views may only be defined by a batch relation, but the flow attempts to write a streaming relation to the materialized view ." + ] + }, + "STREAMING_RELATION_FOR_ONCE_FLOW" : { + "message" : [ + " is an append once-flow that is defined by a streaming relation. Append once-flows may only be defined by or return a batch relation." + ] + }, + "STREAMING_RELATION_FOR_PERSISTED_VIEW" : { + "message" : [ + "Persisted views may only be defined by a batch relation, but the flow attempts to write a streaming relation to the persisted view ." + ] + } + }, + "sqlState" : "42000" + }, "INVALID_FORMAT" : { "message" : [ "The format is invalid: ." @@ -3349,6 +3494,11 @@ "expects a string literal, but got ." ] }, + "TIME_UNIT" : { + "message" : [ + "expects one of the units 'HOUR', 'MINUTE', 'SECOND', 'MILLISECOND', 'MICROSECOND', but got ''." + ] + }, "ZERO_INDEX" : { "message" : [ "expects %1$, %2$ and so on, but got %0$." @@ -3407,7 +3557,7 @@ }, "INVALID_RECURSIVE_CTE" : { "message" : [ - "Invalid recursive definition found. Recursive queries must contain an UNION or an UNION ALL statement with 2 children. The first child needs to be the anchor term without any recursive references." + "Invalid recursive definition found. Recursive queries must contain an UNION or an UNION ALL statement with 2 children. The first child needs to be the anchor term without any recursive references. Any top level inner CTE must not contain self references." ], "sqlState" : "42836" }, @@ -3416,14 +3566,9 @@ "Invalid recursive reference found inside WITH RECURSIVE clause." ], "subClass" : { - "NUMBER" : { - "message" : [ - "Multiple self-references to one recursive CTE are not allowed." - ] - }, "PLACE" : { "message" : [ - "Recursive references cannot be used on the right side of left outer/semi/anti joins, on the left side of right outer joins, in full outer joins, in aggregates, and in subquery expressions." + "Recursive references cannot be used on the right side of left outer/semi/anti joins, on the left side of right outer joins, in full outer joins, in aggregates, window functions or sorts" ] } }, @@ -3551,6 +3696,12 @@ ], "sqlState" : "42K08" }, + "INVALID_SQL_FUNCTION_DATA_ACCESS" : { + "message" : [ + "Cannot create a SQL function with CONTAINS SQL that accesses a table/view or a SQL function that reads SQL data. Please use READS SQL DATA instead." + ], + "sqlState" : "42K0E" + }, "INVALID_SQL_FUNCTION_PLAN_STRUCTURE" : { "message" : [ "Invalid SQL function plan structure", @@ -3821,12 +3972,6 @@ }, "sqlState" : "42K0M" }, - "INVALID_VARIABLE_TYPE_FOR_QUERY_EXECUTE_IMMEDIATE" : { - "message" : [ - "Variable type must be string type but got ." - ], - "sqlState" : "42K09" - }, "INVALID_VARIANT_CAST" : { "message" : [ "The variant value `` cannot be cast into ``. Please use `try_variant_get` instead." @@ -3941,7 +4086,7 @@ }, "JDBC_EXTERNAL_ENGINE_SYNTAX_ERROR" : { "message" : [ - "JDBC external engine syntax error. The error was caused by the query ." + "JDBC external engine syntax error. The error was caused by the query . ." ], "subClass" : { "DURING_OUTPUT_SCHEMA_RESOLUTION" : { @@ -3975,18 +4120,25 @@ ], "sqlState" : "42K0L" }, - "LABEL_ALREADY_EXISTS" : { + "LABEL_OR_FOR_VARIABLE_ALREADY_EXISTS" : { "message" : [ - "The label