diff --git a/.github/workflows/deploy-docs.yml b/.github/workflows/deploy-docs.yml new file mode 100644 index 00000000000..925244b409b --- /dev/null +++ b/.github/workflows/deploy-docs.yml @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# https://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. + + +# Sample workflow for building and deploying a Hugo site to GitHub Pages +name: Deploy Hugo site to Pages + +on: + # Runs on pushes targeting the default branch + push: + branches: + - main + + # Allows you to run this workflow manually from the Actions tab + workflow_dispatch: + +# Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. +# However, do NOT cancel in-progress runs as we want to allow these production deployments to complete. +concurrency: + group: "pages" + cancel-in-progress: false + +jobs: + # Build job + build: + runs-on: ubuntu-latest + env: + HUGO_VERSION: 0.132.1 + steps: + - name: Install Hugo CLI + run: | + wget -q -O ${{ runner.temp }}/hugo.deb https://github.com/gohugoio/hugo/releases/download/v${HUGO_VERSION}/hugo_extended_${HUGO_VERSION}_linux-amd64.deb \ + && sudo dpkg -i ${{ runner.temp }}/hugo.deb + - name: Install Dart Sass + run: sudo snap install dart-sass + - name: Checkout + uses: actions/checkout@v4 + with: + fetch-depth: 0 + - name: Install Node.js dependencies + working-directory: doc/ + run: ls -lah && npm ci + - name: Build with Hugo + working-directory: doc/ + env: + HUGO_CACHEDIR: ${{ runner.temp }}/hugo_cache + HUGO_ENVIRONMENT: production + TZ: America/Los_Angeles + run: | + hugo \ + --gc \ + --minify \ + --baseURL "https://avro.apache.org/" + - name: Commit new site + run: | + set -ex + mv doc/public docs/ + git config --global user.email "dev@avro.apache.org" + git config --global user.name "Github Actions" + git checkout --orphan asf-site-staging + git add . + git commit -m "Update docs" + git checkout asf-site + git reset --hard asf-site-staging + git push origin asf-site --force + diff --git a/.gitmodules b/.gitmodules deleted file mode 100644 index d96e7ce1437..00000000000 --- a/.gitmodules +++ /dev/null @@ -1,3 +0,0 @@ -[submodule "doc/themes/docsy"] - path = doc/themes/docsy - url = https://github.com/google/docsy diff --git a/doc/.gitignore b/doc/.gitignore index b56c8f8a701..48b779e950c 100644 --- a/doc/.gitignore +++ b/doc/.gitignore @@ -1,5 +1,4 @@ public/ resources/ node_modules/ -package-lock.json .hugo_build.lock diff --git a/doc/Dockerfile b/doc/Dockerfile deleted file mode 100644 index 1a671067c65..00000000000 --- a/doc/Dockerfile +++ /dev/null @@ -1,22 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -FROM klakegg/hugo:ext-alpine - -RUN apk add git diff --git a/doc/README.md b/doc/README.md index 31f167d8712..f75ba8a3332 100644 --- a/doc/README.md +++ b/doc/README.md @@ -3,16 +3,13 @@ This website is base on [Hugo](https://gohugo.io) and uses the [Docsy](https://www.docsy.dev/) theme. Before building the website, you need to initialize submodules. -``` -git submodule update --init --recursive +```sh +hugo mod get -u ``` ## Previewing the website locally -``` -# From the doc directory, you will need to do this at least once for our SCSS modifications -(cd doc && npm install) - +```sh # Serve the website dynamically using extended hugo: hugo server --buildDrafts --buildFuture --bind 0.0.0.0 --navigateToChanged @@ -31,7 +28,7 @@ stop with **Build build/staging-web/ manually now. Press a key to continue...** At this point, from another terminal and in the Avro root directory, you can build the website: -``` +```sh # Install the necessary npm packages docker run --entrypoint=sh --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest \ -c "cd build/staging-web && npm install" @@ -42,20 +39,33 @@ docker run --rm -v $(pwd):/src -p 1313:1313 jakejarvis/hugo-extended:latest \ sudo chown -R $USER:$USER build/staging-web ``` -## Avro version - -(TODO) +## New release When a new version of Apache Avro is released: 1. Change the value of `params.avroversion` in `config.toml` 2. Add a new entry to the `Releases` pages in the `Blog` section, for example: +```sh +cp content/en/blog/releases/avro-1.12.0-released.md content/en/blog/releases/avro-1.13.0-released.md ``` -cp content/en/blog/releases/avro-1.10.2-released.md content/en/blog/releases/avro-1.11.0-released.md -``` -## Updating the https://avro.apache.org website from a distribution +### Upload the docs + +Copy the Markdown content from the release tar to the `doc/content/en/docs/1.12.0`: -(TODO) +```sh +tar xvfz avro-src-1.12.0.tar.gz +``` +Here we need to copy everything, except the `api/` directory to this repository. The markdown will be rendered using Hugo, and the API docs are already html, and will be served from the ASF SVN. The `api/` directory needs to be uploaded to SVN: + +```sh +svn co https://svn.apache.org/repos/asf/avro/site +cd site/publish/docs/ +mkdir 1.12.0 +cd 1.12.0 +mkdir api +cp -r ~/Desktop/avro-release-dist/avro-1.12.0/avro-doc-1.12.0/api/ api/ +svn commit -m "Avro 1.12.0 release" +``` diff --git a/doc/config.toml b/doc/config.toml index 4b58b37f02a..2fcf46d71d6 100644 --- a/doc/config.toml +++ b/doc/config.toml @@ -29,9 +29,6 @@ enableMissingTranslationPlaceholders = true enableRobotsTXT = true -# Hugo allows theme composition (and inheritance). The precedence is from left to right. -theme = ["docsy"] - # Will give values to .Lastmod etc. enableGitInfo = true @@ -135,7 +132,7 @@ archived_version = false # The version number for the version of the docs represented in this doc set. # Used in the "version-banner" partial to display a version number for the # current doc set. -version = "++version++" +version = "1.12.0" # A link to latest version of the docs. Used in the "version-banner" partial to # point people to the main doc site. @@ -262,8 +259,12 @@ url = "http://www.apache.org/security/" desc = "Discuss development issues around the project" [[params.versions]] - version = "++version++ (Current)" - url = "/docs/++version++/" +version = "1.12.0" +url = "https://avro.apache.org/docs/1.12.0/" + +[[params.versions]] +version = "1.11.3" +url = "https://avro.apache.org/docs/1.11.3/" [[params.versions]] version = "1.11.2" @@ -416,4 +417,10 @@ url = "https://avro.apache.org/docs/1.11.0/" [[params.versions]] version = "1.0.0" url = "https://avro.apache.org/docs/1.0.0/" - + +[module] + [module.hugoVersion] + extended = true + min = "0.110.0" + [[module.imports]] + path = "github.com/google/docsy" diff --git a/doc/content/en/_index.html b/doc/content/en/_index.html index 618a99a14bf..57c806025ba 100644 --- a/doc/content/en/_index.html +++ b/doc/content/en/_index.html @@ -69,4 +69,4 @@

Apache Avro™ - a data serialization sy Learn from or connect with other users in our open and welcoming community. We'd love to hear from you! {{% /blocks/feature %}} -{{< /blocks/section >}} \ No newline at end of file +{{< /blocks/section >}} diff --git a/doc/content/en/_index.md b/doc/content/en/_index.md new file mode 100644 index 00000000000..ae6cc051fd9 --- /dev/null +++ b/doc/content/en/_index.md @@ -0,0 +1,40 @@ +--- +title: Apache Avro +--- + +{{< blocks/cover title="Apache Avro™ " image_anchor="top" >}} + +Learn More + + +Download + +

a data serialization system

+{{< blocks/link-down color="info" >}} +{{< /blocks/cover >}} + + +{{% blocks/lead color="primary" %}} + +Apache Avro™ is the leading serialization format for record data, and first choice for streaming data pipelines. It offers excellent schema evolution, and has implementations for the JVM (Java, Kotlin, Scala, …), Python, C/C++/C#, PHP, Ruby, Rust, JavaScript, and even Perl. + +{{% /blocks/lead %}} + + +{{< blocks/section color="dark" type="row">}} + + +{{% blocks/feature icon="fab fa-java" title="Getting started with Java" url="/docs/++version++/getting-started-java" %}} +For Java / JVM users, find out everything you need to know about specifying a schema, (de)serializing Avro data and code generation. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fab fa-python" title="Getting started with Python" url="/docs/++version++/getting-started-python" %}} +For Python users, find out everything you need to know about specifying a schema and (de)serializing Avro data. +{{% /blocks/feature %}} + +{{% blocks/feature icon="fad fa-comments" title="Join Our Community!" url="/community/" %}} +Learn from or connect with other users in our open and welcoming community. We'd love to hear from you! +{{% /blocks/feature %}} + +{{< /blocks/section >}} + diff --git a/doc/content/en/blog/releases/avro-1.12.0-released.md b/doc/content/en/blog/releases/avro-1.12.0-released.md new file mode 100755 index 00000000000..d4703858ebd --- /dev/null +++ b/doc/content/en/blog/releases/avro-1.12.0-released.md @@ -0,0 +1,538 @@ +--- +title: "Avro 1.12.0" +linkTitle: "Avro 1.12.0" +date: 2024-08-05 +--- + + + +The Apache Avro community is pleased to announce the release of Avro 1.12.0! + +All signed release artifacts, signatures and verification instructions can be found }}">here + +## Changes + +### Sub-task + +- [AVRO-3122]: TestAvroKeyOutputFormat and other avro-mapred tests fail with Java 17 +- [AVRO-3308]: Include a curated list of resources +- [AVRO-3384]: Define C# Coding Style Guidelines +- [AVRO-3449]: Add an onboarding guide for contributors, committers and PMC +- [AVRO-3458]: Add test coverage for GenericRecord +- [AVRO-3488]: Fix Spelling Mistakes +- [AVRO-3490]: Fix IDE0016 Use throw expression +- [AVRO-3491]: Fix IDE0020 Use pattern matching to avoid 'is' check followed by a cast +- [AVRO-3497]: Fix IDE0075 Simplify conditional expression +- [AVRO-3499]: Fix IDE0079 Remove unnecessary suppression +- [AVRO-3538]: Improve the contributions page +- [AVRO-3700]: Publish Java SBOM artifacts with CycloneDX +- [AVRO-3813]: Use list of primitiv +- [AVRO-3826]: Commons test for C++ module +- [AVRO-3916]: Add nanos support for the Rust SDK +- [AVRO-3926]: [Rust] Allow UUID to serialize to Fixed[16] + +### Bug fixes + +- [AVRO-265]: Protocol namespace always written out in toJson +- [AVRO-1318]: Python schema should store fingerprints +- [AVRO-1463]: Undefined values cause warnings when unions with null serialized +- [AVRO-1517]: Unicode strings are accepted as bytes and fixed type by perl API +- [AVRO-1521]: Inconsistent behavior of Perl API with 'boolean' type +- [AVRO-1523]: Perl API: int/long type minimum value checks are off by one +- [AVRO-1737]: Unhashable type: 'RecordSchema' +- [AVRO-1830]: Avro-Perl DataFileReader chokes when avro.codec is absent +- [AVRO-2254]: Unions with 2 records declared downward fail +- [AVRO-2284]: Incorrect EnumSymbol initialization in TestReadingWritingDataInEvolvedSchemas.java +- [AVRO-2498]: UUID generation is not working avro 1.9 version +- [AVRO-2598]: C++ standard of library implies C++ standard of projects using Avro +- [AVRO-2722]: impl/DataFile.cc use of boost::mt19937 for DataFileWriteBase::makeSync is not thread safe +- [AVRO-2771]: Java 1.9.X doesn't allow having Error in a Record +- [AVRO-2862]: C# Primitive Schema losing metadata +- [AVRO-2883]: Avrogen (csharp) namespace mapping missing for references +- [AVRO-2885]: Providing a decimal number in an int field doesn't return an error +- [AVRO-2943]: Map comparison between Utf8 and String keys fails +- [AVRO-2987]: pkg-config has a broken `Requires:` section +- [AVRO-3003]: c# apache avro codegen - default value for enum types are not setting up properly +- [AVRO-3133]: EnumAdjust.resolve should compare unqualified name rather than full name +- [AVRO-3216]: Rust: failure reading multiple use of named schemas in file +- [AVRO-3232]: Rust deserializer: add missing matches to deserialize_any union and string/map +- [AVRO-3234]: Rust: Add new codec: zstandard +- [AVRO-3240]: Schema deserialization is not backwards compatible +- [AVRO-3259]: When opening an avro file which is encoded with anything besides none and deflate, it defaults to none and then returns garbage. +- [AVRO-3273]: [Java] avro-maven-plugin breaks on old versions of Maven +- [AVRO-3316]: [Rust] build breaks in docker build +- [AVRO-3322]: JavaScript: Buffer is not defined in browser environment +- [AVRO-3331]: Rust: Cannot extract Decimal value +- [AVRO-3350]: Validate that Default value is found in Enum +- [AVRO-3386]: [PHP] Build failing on github and travis +- [AVRO-3410]: [Rust] lint failure +- [AVRO-3433]: Rust: The canonical form should preserve schema references +- [AVRO-3448]: Rust: Encoding Panic with valid schema and input +- [AVRO-3452]: [rust] Derive Deserialize produces invalid Name struct +- [AVRO-3460]: [rust] Value::validate does not validate against Schema Refs +- [AVRO-3461]: [rust] Resolution Flow does not handle schema Refs +- [AVRO-3466]: Rust: serialize Schema to JSON loses inner namespace names +- [AVRO-3468]: Default values for logical types not supported +- [AVRO-3471]: Microseconds logical types are rounded to milliseconds +- [AVRO-3481]: Input and output variable type mismatch +- [AVRO-3482]: DataFileReader should reuse MAGIC data read from inputstream +- [AVRO-3486]: Protocol namespace not parsed correctly if protocol is defined by full name +- [AVRO-3495]: Rust: Record serialization is sensitive to order of fields in struct +- [AVRO-3511]: Rust: Fix the parsing of decimal logical type +- [AVRO-3516]: [rust] Avro Derive not working outside of repo context +- [AVRO-3529]: [Rust][branch-1.11] Cargo.toml is a virtual manifest, requires actual package +- [AVRO-3534]: Rust: Use dependency-review-action only for pull_request events +- [AVRO-3536]: Union type not inheriting type conversions +- [AVRO-3549]: [rust] Avro reader fails if it tries to read data compressed with codec that is not enabled in features +- [AVRO-3560]: avro ignores input after end of avsc json +- [AVRO-3568]: C# ToParsingForm normalizes logical type to "logical" rather than base type +- [AVRO-3581]: Usage of deprecated configuration properties in Velocity +- [AVRO-3585]: Unable to encode Value::String as Schema::UUID +- [AVRO-3587]: C: Fix possible heap-buffer-overflow in avro::DataFileReaderBase::readDataBlock() +- [AVRO-3595]: Release Notes missing for 1.11.1 +- [AVRO-3597]: Recent changes in GenericDatumReader.java break compatibility +- [AVRO-3601]: C++ API header contains breaking include +- [AVRO-3612]: Report specific location of incompatibility in record schema +- [AVRO-3613]: Unions cannot have more than one logical type in C# +- [AVRO-3617]: [C++] Integer overflow risks with Validator::count_ and Validator::counters_ +- [AVRO-3618]: [Java] TestBinaryDecoder should check consistency with directBinaryDecoder +- [AVRO-3619]: [Java] TestBinaryDecoder should check consistency with directBinaryDecoder +- [AVRO-3622]: Python compatibility check fails if record with and without namespace are compared +- [AVRO-3625]: [Rust] UnionSchema.is_nullable() should return true if any of the variants is Schema::Null +- [AVRO-3631]: Fix serialization of structs containing Fixed fields +- [AVRO-3632]: Union defaults are not handled as per the specification +- [AVRO-3642]: GenericSingleObjectReader::read_value fails on non-exhaustive read +- [AVRO-3645]: Fix deserialization of enum with unit () type +- [AVRO-3650]: Fix C++ Build on Manjaro +- [AVRO-3656]: Vulnerabilities from dependencies - jackson-databind & commons-text +- [AVRO-3657]: Computation of initial buffer size in OutputBuffer makes no sense +- [AVRO-3659]: Typo in python example +- [AVRO-3662]: [Ruby] Ruby 2.6 CI workflow fails since a while +- [AVRO-3663]: rust crate apache_avro_derive creates invalid schemas for raw identifiers +- [AVRO-3667]: [Python] Python 3.10 CI test fails since a while +- [AVRO-3669]: Missing py.typed file +- [AVRO-3674]: Value::Record containing enums fail to validate when using namespaces in Schema +- [AVRO-3683]: Rust Writer, Reader can't use Schemas with dependencies in other Schemas. i.e. The output of Schema::parse_list +- [AVRO-3687]: Rust enum missing default +- [AVRO-3688]: Schema resolution panics when a custom record field is included multiple times +- [AVRO-3698]: [Java] SpecificData.getClassName must replace reserved words +- [AVRO-3706]: AVDL nested imports cannot be resolved if path contains spaces +- [AVRO-3712]: C++ Build Failure on Manjaro +- [AVRO-3724]: C# JsonEncoder can't handle nested array of records +- [AVRO-3737]: [C] memcheck_test_avro_commons_schema is failing +- [AVRO-3738]: [Build][C#] The release build fails with .NET 7.0 target +- [AVRO-3747]: Make serde `is_human_readable` configurable +- [AVRO-3748]: issue with DataFileSeekableInput.SeekableInputStream.skip +- [AVRO-3749]: incorrect conflicting field when field name starts with symbols +- [AVRO-3751]: FastReaderBuilder in multithread lead to infinite loop also blocking other threads +- [AVRO-3755]: [Rust] Deserialization fails for reader schema with namespace +- [AVRO-3756]: Support writing types back to the user in memory without writing files to disk +- [AVRO-3767]: [Rust] Fix ref resolving in Union +- [AVRO-3772]: [Rust] Deserialize Errors for an Unknown Enum Symbol instead of Returning Default +- [AVRO-3773]: [Ruby] Decimal logical type fail to validate default +- [AVRO-3775]: [Ruby] decimal default is not converted to BigDecimal +- [AVRO-3780]: [Rust] Bug: decimal logical type usage through Fixed schema +- [AVRO-3782]: [Rust] Incorrect decimal resolving +- [AVRO-3785]: [Rust] Deserialization if reader schema has a namespace and a union with null and a record containing a reference type +- [AVRO-3786]: [Rust] Deserialization results in FindUnionVariant error if the writer and reader have the same symbol but at different positions +- [AVRO-3787]: [Rust] Deserialization fails to use default if an enum in a record in a union is given an unknown symbol +- [AVRO-3800]: profile section should be declared in the root package. +- [AVRO-3809]: Faulty validation of a type reference with implicit nested namespace +- [AVRO-3814]: [Rust] Schema resolution fails when extending a nested record with a union type +- [AVRO-3818]: Enclosing namespace should be inherited to the inner named types if they have no their own namespaces +- [AVRO-3820]: Don't allow invalid field names +- [AVRO-3821]: Rust: Record (de?)serialization is sensitive to order of fields in struct +- [AVRO-3823]: Show helpful error messages +- [AVRO-3824]: The instruction for building the website should be more precise +- [AVRO-3827]: Disallow duplicate field names +- [AVRO-3830]: Handle namespace properly if a name starts with dot +- [AVRO-3837]: Disallow invalid namespaces for the Rust binding +- [AVRO-3846]: Race condition can happen among serde tests +- [AVRO-3847]: Record field doesn't accept default value if field type is union and the type of default value is pre-defined name +- [AVRO-3849]: [Rust] "make readme" doesn't work +- [AVRO-3855]: [rust] lint/clippy fails in ubertool +- [AVRO-3858]: [Build] Add some config to ./build.sh sign +- [AVRO-3859]: [Build][C#] build.sh clean fails to remove some C# files +- [AVRO-3861]: [Build] Add RAT exclusions for python docs +- [AVRO-3865]: [Build][perl] Files are leftover after a build +- [AVRO-3866]: [Build][Python] Files are leftover after a build +- [AVRO-3876]: JacksonUtils is not symmetric +- [AVRO-3881]: Writer ignores user metadata when the body is empty +- [AVRO-3888]: CVE with common compress +- [AVRO-3889]: Maven Plugin Always Recompiles IDL Files +- [AVRO-3894]: [Rust] Record field aliases are not taken into account when serializing +- [AVRO-3897]: Disallow invalid namespace in fully qualified name for Rust SDK +- [AVRO-3898]: [rust] compatibility fails with different namespaces +- [AVRO-3899]: [Rust] Invalid logical types should be ignored and treated as the underlying type +- [AVRO-3912]: Issue with deserialization for BigDecimal in rust +- [AVRO-3925]: [Rust]Decimal type serialization/deserialization is incorrect. +- [AVRO-3928]: Avro Rust cannot parse default int logical-type date in a valid schema +- [AVRO-3932]: [C]: fix variable reference in CMakeLists.txt +- [AVRO-3940]: Failed to generate Java classes from multiple .avsc files containing same type +- [AVRO-3953]: C# CodeGen.cs:503 incorrectly throws for "reserved keywords" +- [AVRO-3955]: [Rust] unable to decode string enum from avro encoded data +- [AVRO-3956]: NPE when calling Protocol#equals or hashCode +- [AVRO-3957]: Fix typos in docs and examples +- [AVRO-3964]: [Rust] Out-of-bounds panic +- [AVRO-3970]: [Rust] incorrect compatibility checks with logicalType uuid +- [AVRO-3974]: [Rust] incorrect compatibility checks with ref fields +- [AVRO-3990]: [C++] avrogencpp generates invalid code for union with a reserved word +- [AVRO-4004]: [Rust] Canonical form transformation does not strip the logicalType +- [AVRO-4006]: [Java] DataFileReader does not correctly identify last sync marker when reading/skipping blocks +- [AVRO-4011]: Schema generated via AvroSchema is not compatible with itself +- [AVRO-4014]: [Rust] Sporadic value-schema mismatch with fixed struct + +### New Features + +- [AVRO-3223]: Support optional codecs in C# library +- [AVRO-3358]: Update documentation in DataFileReader +- [AVRO-3388]: Implement extra codecs for C# as seperate nuget packages +- [AVRO-3506]: [rust] Implement Single Object Writer +- [AVRO-3507]: [rust] Implement Single Object Reader +- [AVRO-3591]: Improve interoperability tests with a common test suite +- [AVRO-3592]: [C#] New packages are not included in the build distribution +- [AVRO-3666]: New schema parser for all supported schema formats +- [AVRO-3677]: Introduce Named Schema Formatters +- [AVRO-3678]: [Rust] Support write float value to field defined as double +- [AVRO-3725]: fix documentation of functions and variables +- [AVRO-3764]: [Rust] Add schemata-based resolve method +- [AVRO-3872]: [Build][C#] Warning on nuget upload about README +- [AVRO-3922]: Add timestamp-nanos support to Ruby + +### Improvements + +- [AVRO-312]: Generate documentation for Python with Sphinx +- [AVRO-530]: allow for mutual recursion in type definitions +- [AVRO-1496]: Avro aliases support for C++ +- [AVRO-1514]: Clean up perl API dependencies +- [AVRO-1938]: Python support for generating canonical forms of schema +- [AVRO-2307]: Opt-in setting to improve GC behavior during deserialization? +- [AVRO-2397]: Implement Alias Support for C++ +- [AVRO-2717]: Fix undefined behaviour in ZigZag encoding if Avro was compiled with the C++ standard less than C++20. +- [AVRO-3001]: JsonEncode Decode support for C# +- [AVRO-3043]: Remove redundant generic casts +- [AVRO-3078]: C#: Logical type 'local-timestamp-millis' +- [AVRO-3084]: Fix JavaScript interop test to read files generated by other languages on CI +- [AVRO-3120]: Support Next Java LTS (Java 17) +- [AVRO-3214]: Rust: Support "doc" for FixedSchema +- [AVRO-3245]: Rust: Replace crc crate with crc32fast +- [AVRO-3246]: Rust: Add new codec: bzip2 +- [AVRO-3248]: Rust: Support named types in UnionSchema +- [AVRO-3255]: [Ruby] specify rubygems_mfa_required in gemspec metadata +- [AVRO-3264]: Improve the Avro landing page +- [AVRO-3274]: Request for C# API to implement a JSON Encoder +- [AVRO-3284]: Rust: Upgrade to digest 0.10 +- [AVRO-3285]: Upgrade JavaCC and plugin +- [AVRO-3292]: Bump Microsoft.NET.Test.Sdk from 16.11.0 to 17.0.0 in /lang/csharp +- [AVRO-3302]: Rust: Implement interop tests for the Rust module +- [AVRO-3303]: Rust: Add support for Xz codec +- [AVRO-3306]: Java: Build failure with JDK 18+ +- [AVRO-3312]: Rust: Use u32 instead of i32 for the Enum/Union's index field +- [AVRO-3314]: ArgumentOutOfRangeException thrown in AvroDecimal IConvertable.ToType +- [AVRO-3315]: Rust: Add support to back/cycle reference an alias +- [AVRO-3317]: JavaScript: Update dependencies +- [AVRO-3318]: Java: Bump slf4j.version from 1.7.32 to 1.7.33 in /lang/java +- [AVRO-3319]: Rust: Update zstd requirement from 0.9.0+zstd.1.5.0 to 0.10.0+zstd.1.5.0 in /lang/rust +- [AVRO-3320]: C#: Bump NUnit3TestAdapter from 4.2.0 to 4.2.1 in /lang/csharp +- [AVRO-3321]: Java: Bump commons-cli from 1.4 to 1.5.0 in /lang/java +- [AVRO-3323]: Remove suppression of CS1591 from AvroDecimal +- [AVRO-3324]: Add omitted braces in AvroDecimal +- [AVRO-3325]: Remove suppression of CA2225 in AvroDecimal +- [AVRO-3326]: Styling - Elements should not be on a single line in AvroDecimal +- [AVRO-3327]: Use Pattern Matching to avoid is check followed by cast +- [AVRO-3328]: Documentation update for CodeGen class +- [AVRO-3329]: Add omitted braces in CodeGen class +- [AVRO-3330]: Avrogen avsc compiler should return 0 exit code if help requested +- [AVRO-3333]: Spacing styling issues in CodeGen class +- [AVRO-3334]: Simplify getNullableType in CodeGen +- [AVRO-3335]: Throw exception for null parameter in GenerateNames +- [AVRO-3336]: Deprecate obsolete namespace lookup in CodeGen +- [AVRO-3337]: C#: Bump Log4net to a newer version +- [AVRO-3340]: Enable standard code analysis and Intellisense +- [AVRO-3341]: Update documentation of CodeGenException +- [AVRO-3342]: Update documentation in CodeGenUtil +- [AVRO-3343]: Update codec to styling standards +- [AVRO-3344]: C#: Remove DataBlock class +- [AVRO-3345]: Resolve unnecessary suppression of CA1052 in DataFileConstants +- [AVRO-3346]: Update documentation to meet standards in DataFileReader +- [AVRO-3347]: Update AddNamespace in CodeGen to meet styling guidelines +- [AVRO-3348]: Update ProcessSchemas to meet styling guidelines +- [AVRO-3349]: Update ProcessProtocols to meet styling guidelines +- [AVRO-3352]: Use required minimum package version fo Newtonsoft only +- [AVRO-3353]: Simplify naming in CodeGen +- [AVRO-3354]: Simplify If statements in CodeGen +- [AVRO-3355]: Fix order of Access Modifier in Codec +- [AVRO-3356]: Simplify naming in DataFileReader +- [AVRO-3357]: Properties only assigned in constructors should be marked readonly +- [AVRO-3359]: Updated formatting in DeflateCodec +- [AVRO-3360]: Update Header XML Documentation +- [AVRO-3361]: Simplify if statement in NullCodec +- [AVRO-3366]: Fix naming in GenericEnum +- [AVRO-3367]: Remove unnecessary suppression of CA1307 from GenericEnum +- [AVRO-3377]: Deserialization of record of mangled Java class throws ClassCastException +- [AVRO-3404]: Extend the IDL syntax to serve as a .avsc equivalent as well +- [AVRO-3405]: add API for user-provided metadata when writing to Object Container File +- [AVRO-3407]: Test for user metadata in the interop tests +- [AVRO-3415]: Add C# code coverage support +- [AVRO-3416]: Benchmarking project for C# +- [AVRO-3418]: [Rust] Fix clippy errors for Rust 1.59.0 +- [AVRO-3421]: Add tests for ArraySchema +- [AVRO-3424]: C# Add support to parse string into Schema.Type +- [AVRO-3427]: Add command line option to skip creation of directories based on namespace path +- [AVRO-3434]: .NET/#C: Support LogicalSchema for ReflectReader/Writer +- [AVRO-3435]: Add --version to avrogen +- [AVRO-3450]: Document IDL support in IDEs +- [AVRO-3451]: fix poor Avro write performance +- [AVRO-3453]: C# Avrogen Add Generated Code Attribute +- [AVRO-3464]: Rust: Print user frientlier output for the 'benchmark' example +- [AVRO-3465]: Add avrogen protocol tests +- [AVRO-3467]: Use oracle-actions to test with Early Access JDKs +- [AVRO-3469]: Build and test using .NET SDK 7.0 in guthub action +- [AVRO-3474]: Increase read performance by moving CanRead to constructor +- [AVRO-3475]: Enforce time-millis and time-micros specification +- [AVRO-3477]: Add unit tests for logical types with fixed base type +- [AVRO-3479]: [rust] Derive Avro Schema macro +- [AVRO-3483]: [Rust] Log error messages with a reason when the validation fails +- [AVRO-3484]: Rust: Implement derive default via annotation +- [AVRO-3485]: Rust: Implement derive doc via annotation +- [AVRO-3487]: Java: Bump Jackson to 2.12.6.1 +- [AVRO-3489]: JavaScript: Replace istanbul with nyc for code coverage +- [AVRO-3492]: Rust: Implement derive aliases via annotation +- [AVRO-3496]: Rust: Use visitor.visit_borrowed_str() when possible +- [AVRO-3498]: Deprecate NameCtorKey +- [AVRO-3500]: Rust: Use property based testing for avro_derive IT tests +- [AVRO-3501]: Rust: Enable Github Actions caching for the Rust CI +- [AVRO-3502]: Rust: Wrong [ORDER] for Parsing Canonical Form +- [AVRO-3510]: PHP build fails on Travis +- [AVRO-3517]: Rust: Optimize crates' size by disabling default features of the dependencies +- [AVRO-3518]: Rust: Represent aliases as Name instead of String +- [AVRO-3522]: Rust: Setup better logging and colored stacktraces for the tests +- [AVRO-3526]: Rust: Improve resolving Bytes and Fixed from string +- [AVRO-3527]: Generated equals() and hashCode() for SpecificRecords +- [AVRO-3530]: Rust: Use dependency-review-action for Rust +- [AVRO-3533]: Rust: Update dependencies +- [AVRO-3542]: Scale assignment optimization +- [AVRO-3543]: Support wasm32 compilation target for Rust library +- [AVRO-3547]: support custom attribute at field level +- [AVRO-3554]: Create original art for the Avro logo +- [AVRO-3579]: Java Test : From Junit4 to JUnit5 +- [AVRO-3586]: Make Avro Build Reproducible +- [AVRO-3599]: Rust: Make apache-avro-test-helper releasable +- [AVRO-3600]: [Rust] UnionSchema::new method should be public +- [AVRO-3602]: Support Map(with non-String keys) and Set in ReflectDatumReader +- [AVRO-3608]: Rust: Fix clippy errors in Rust 1.63.0 +- [AVRO-3609]: support custom attributes +- [AVRO-3610]: [C++] Upgrade from C++ 11 to C++ 17 +- [AVRO-3611]: org.apache.avro.util.RandomData generates invalid test data +- [AVRO-3616]: [C++]: Fix compilation warnings +- [AVRO-3621]: [Rust] Improved resolution of nullable record fields +- [AVRO-3623]: Improve the PULL_REQUEST_TEMPLATE +- [AVRO-3624]: Fix Avro website checks on whimsy +- [AVRO-3630]: [Rust] Make it possible to extend pre-existing Avro bytes +- [AVRO-3633]: Additional attributes for 'avro_derive' crate +- [AVRO-3634]: Implement AvroSchemaComponent for bool +- [AVRO-3639]: [Rust] Derive implementation for Eq where possible +- [AVRO-3644]: [JAVA] Support java.util.Optional in reflect package +- [AVRO-3649]: [JAVA] reorder union types to match default value +- [AVRO-3658]: Bump jackson to address CVE-2020-36518 +- [AVRO-3660]: SpecificRecord java data generator helper method - should I contribute? +- [AVRO-3679]: [Rust] Enable 'perf' feature of regex dependency +- [AVRO-3692]: Serde flatten is not supported when deserializing +- [AVRO-3693]: avrogencpp Invalid type for union exception does not identify which union +- [AVRO-3704]: Naming rules : multiple choice +- [AVRO-3705]: avrogencpp needs an option to generate code using std instead of boost +- [AVRO-3708]: [Rust] Fix clippy warnings introduced with Rust 1.67.0 +- [AVRO-3709]: [Rust] Add aliases to RecordField +- [AVRO-3711]: Add documentation about uuid in IDL +- [AVRO-3721]: [Java] Add cache to org.apache.avro.JsonProperties.getObjectProps +- [AVRO-3722]: Eagerly Initialize Instance Variables in Ruby Implementation +- [AVRO-3723]: [Rust] Make schema::ResolvedSchema and schema::Names public +- [AVRO-3727]: Add RollForward to C# avrogen tool +- [AVRO-3741]: Note about the version requirement of Rust in BUILD.md +- [AVRO-3742]: Bump maven-plugin-plugin from 3.8.1 to 3.8.2 +- [AVRO-3743]: Bump cyclonedx-maven-plugin from 2.7.6 to 2.7.7 +- [AVRO-3744]: Bump maven-checkstyle-plugin from 3.2.1 to 3.2.2 +- [AVRO-3745]: Bump zstd-jni from 1.5.4-2 to 1.5.5-2 +- [AVRO-3746]: Bump grpc.version from 1.54.0 to 1.54.1 +- [AVRO-3757]: [rust] Update syn to 2.x +- [AVRO-3758]: [Rust] Use AtomicXyz types instead of static mutable ones +- [AVRO-3759]: [Rust] Schema types inconsistency +- [AVRO-3766]: [Rust] Print friendlier errors when test cases fail +- [AVRO-3771]: [Rust] Logging flood during validate method +- [AVRO-3779]: Any big decimal conversion +- [AVRO-3784]: [Rust] Make Decimal more usable until its rewritten +- [AVRO-3790]: [RUBY] Missing default namespace information in SchemaParseError +- [AVRO-3794]: [Rust] Do not fail the shared tests when the shared folder is not available +- [AVRO-3799]: Enable the schema parser to read and parse from input streams for Rust binding +- [AVRO-3812]: Handle null namespace properly for canonicalized schema representation +- [AVRO-3815]: Broken indentation in the specification doc +- [AVRO-3828]: [Rust] Use newer Github actions for setting up Rust +- [AVRO-3829]: JUnit4 to JUnit5 : continue +- [AVRO-3833]: Spec: clarify usage names and aliases +- [AVRO-3835]: [Rust] Get rid of byteorder and zerocopy dependencies +- [AVRO-3836]: [Rust] Fix the build with Rust 1.65.0 +- [AVRO-3838]: [Rust] Replace regex crate with regex-lite +- [AVRO-3839]: [Rust] Replace lazy_static crate with std::sync::OnceLock +- [AVRO-3844]: [Rust] Fix clippy errors with Rust 1.72.0 +- [AVRO-3851]: Validate default value for record fields and enums on parsing +- [AVRO-3852]: Support Java 21 +- [AVRO-3853]: Support local-timestamp logical types for the Rust SDK +- [AVRO-3862]: Add aliases and doc methods to Schema in Rust SDK +- [AVRO-3863]: Delete temporary test data after tests finish +- [AVRO-3868]: Check consistency between the doc comment in lib.rs and README.md +- [AVRO-3870]: Speed up CI for Rust +- [AVRO-3871]: Add BlockingDirectBinaryEncoder +- [AVRO-3877]: [doc] fix wrong configuration for avro-maven-plugin in java example +- [AVRO-3878]: Rename default git branch to be 'main' +- [AVRO-3879]: [Build][Python] Fix `./build.sh clean` to remove the generated Python documents +- [AVRO-3880]: Upgrade maven-antrun-plugin to 3.1.0 +- [AVRO-3884]: Add local-timestamp-nanos and timestamp-nanos +- [AVRO-3885]: Update the maillist link +- [AVRO-3886]: [Rust] Serialize attribute in schema to support custom logical type +- [AVRO-3887]: Remove redundant casts +- [AVRO-3891]: Remove redundant cast from DirectBinaryDecoder +- [AVRO-3892]: [Rust] support to convert bytes to fixed in resolve_fixed +- [AVRO-3896]: [Rust] support read schema with custom logical type +- [AVRO-3900]: Permissiveness in schema namespaces for rust SDK? +- [AVRO-3901]: [Rust] Better serde union support +- [AVRO-3904]: [rust] Sometimes when calculating schema compatibility the code panics but maybe it should not +- [AVRO-3905]: [Rust] Fix clippy error with Rust 1.74.0 +- [AVRO-3910]: [Rust] Replace `color-backtrace` with `better-panic` for the tests +- [AVRO-3914]: Add nanos support for the Java SDK +- [AVRO-3917]: [Rust] Field aliases are not taken into account when calculating schema compatibility +- [AVRO-3918]: Allow UUID to serialize to Fixed[16] +- [AVRO-3919]: Add UUID type example +- [AVRO-3920]: [Rust] Serialize custom attribute in RecordField +- [AVRO-3923]: Add Avro 1.11.3 release blog +- [AVRO-3927]: [Rust] support custom attributes in list and map +- [AVRO-3935]: Support logical types in Rust Schema Compatibility checks +- [AVRO-3936]: Clean up NOTICE file +- [AVRO-3938]: Schema.Parser.validate should not be null +- [AVRO-3939]: [Rust] Make it possible to use custom schema comparators +- [AVRO-3942]: MemoryOutputStream yields a compiler warning +- [AVRO-3943]: Unused folders +- [AVRO-3948]: [Rust] Re-export bigdecimal::BigDecimal as apache_avro::BigDecimal +- [AVRO-3949]: [Rust]: Add support for serde to apache_avro::Decimal +- [AVRO-3950]: [rust] Some code when checking schema compatibility is never reached +- [AVRO-3958]: Update min CMake version to 3.5 +- [AVRO-3959]: Avoid deprecated OSX atomic ops +- [AVRO-3960]: Fix st ANYARGS warnings +- [AVRO-3961]: Add AVRO_INVALID to avro_type_t +- [AVRO-3962]: [Rust] avro-derive supports extract docs from field comments +- [AVRO-3977]: Fix failing typecheck in Python 3.12 +- [AVRO-3981]: Close SyncableFileOutputStream +- [AVRO-3982]: Use String.isEmpty() instead +- [AVRO-3983]: Allow setting a custom encoder in DataFileWriter +- [AVRO-3985]: Restrict trusted packages in ReflectData and SpecificData +- [AVRO-3992]: [C++] Encoding a record with 0 fields in a vector throws +- [AVRO-3994]: [C++] Solidus (/) should not be escaped in JSON output +- [AVRO-3995]: [C++] Update build system to disallow compiling with unsupported language versions +- [AVRO-3998]: Switch Perl library from JSON::XS to JSON::MaybeXS +- [AVRO-3999]: Avoid warnings in Perl test suite +- [AVRO-4007]: [Rust] Faster is_nullable for UnionSchema +- [AVRO-4010]: Avoid resolving schema on every call to read() +- [AVRO-4013]: PHP 8 Deprecations +- [AVRO-4015]: avro-cpp does not work with CMake's FetchContent +- [AVRO-4016]: Remove the use of MD5 in org.apache.avro.file.DataFileWriter#generateSync +- [AVRO-4019]: [C++] Correct signedness of validator methods +- [AVRO-4022]: Revive docker image + +### Testing + +- [AVRO-3277]: Test against Ruby 3.1 +- [AVRO-3278]: Drop support for Ruby 2.6 +- [AVRO-3558]: Rust: Add a demo crate that shows usage as WebAssembly +- [AVRO-3696]: [Python] Replace tox-wheel with upstream tox 4 +- [AVRO-3697]: Test against Ruby 3.2 +- [AVRO-3701]: Add github action to validate maven 4 build compatibility +- [AVRO-3921]: Test against Ruby 3.3 + +### Wishes + +- [AVRO-1757]: Serialize Avro schema objects to avdl file (IDL format) +- [AVRO-2211]: SchemaBuilder equivalent or other means of schema creation +- [AVRO-3197]: Rust: Disable logical type on failure + +### Tasks + +- [AVRO-3205]: Rust: Update Cargo.toml [package] information +- [AVRO-3241]: [Java] Publish SNAPSHOT artifacts +- [AVRO-3242]: Use TravisCI for testing Apache Avro on Linux ARM64 +- [AVRO-3247]: Rust: Run MIRI checks +- [AVRO-3281]: Bump zstd-jni from 1.5.0-4 to 1.5.1-1 in /lang/java +- [AVRO-3282]: Bump grpc.version from 1.42.1 to 1.43.1 in /lang/java +- [AVRO-3283]: Update zerocopy requirement from 0.3.0 to 0.6.1 in /lang/rust +- [AVRO-3304]: avro-tools Update log4j dependency for critical vulnerability +- [AVRO-3309]: Bump NUnit.ConsoleRunner from 3.13.2 to 3.14.0 in /lang/csharp +- [AVRO-3310]: Bump build-helper-maven-plugin from 3.2.0 to 3.3.0 in /lang/java +- [AVRO-3311]: Bump grpc.version from 1.43.1 to 1.43.2 in /lang/java +- [AVRO-3332]: Java: Bump grpc.version from 1.43.2 to 1.44.0 in /lang/java +- [AVRO-3339]: Rust: Rename crate from avro-rs to apache-avro +- [AVRO-3351]: C#: Bump System.Reflection.Emit.Lightweight from 4.3.0 to 4.7.0 in /lang/csharp +- [AVRO-3372]: Java: Bump archetype-plugin.version from 3.2.0 to 3.2.1 in /lang/java +- [AVRO-3373]: Java: Bump protobuf-java from 3.19.1 to 3.19.4 in /lang/java +- [AVRO-3391]: Update typed-builder requirement from 0.9.1 to 0.10.0 in /lang/rust +- [AVRO-3409]: [Java] Bump Reload4j to 1.2.19 +- [AVRO-3419]: [Rust] Update strum 0.23.1 and strum_macros to 0.24.0 +- [AVRO-3422]: Bump jetty.version from 9.4.44.v20210927 to 9.4.45.v20220203 in /lang/java +- [AVRO-3428]: Rust: Restructure the RUST SDK to a Rust workspace +- [AVRO-3431]: CI: Cancel in-progress workflows if there are new commits in PR +- [AVRO-3432]: Java: Bump grpc.version from 1.44.0 to 1.44.1 in /lang/java +- [AVRO-3437]: Rust: Update dependencies +- [AVRO-3439]: Java: Bump netty-bom from 4.1.72.Final to 4.1.74.Final in /lang/java +- [AVRO-3455]: Java: Bump netty-bom from 4.1.74.Final to 4.1.75.Final +- [AVRO-3456]: Rust: Update zstd requirement from 0.10.0+zstd.1.5.2 to 0.11.0+zstd.1.5.2 +- [AVRO-3457]: JS: Bump mocha from 9.2.1 to 9.2.2 +- [AVRO-3462]: Java: Bump hadoop-client from 3.3.1 to 3.3.2 +- [AVRO-3463]: Java: Bump grpc.version from 1.44.1 to 1.45.0 +- [AVRO-3494]: Rust: uncomment some tests which pass +- [AVRO-3519]: Rust: Remove MIRI Github Actions check +- [AVRO-3552]: Rust: sort the contents in Cargo.toml files with cargo-tomlfmt +- [AVRO-3574]: Rust: Add Cargo.lock to Git +- [AVRO-3575]: Rust: Add a module for fuzzy testing +- [AVRO-3653]: [build] Move off Travis CI +- [AVRO-3661]: [Rust] Fix new clippy errors introduced with Rust 1.65 +- [AVRO-3672]: Add CI testing for Python 3.11 +- [AVRO-3681]: [Python] GitHub actions failing with python 3.6 +- [AVRO-3682]: [Build] Remove forrest from Avro build +- [AVRO-3754]: upgrade to jackson 2.15.0 +- [AVRO-3793]: [Rust] Bump minimum supported version of Rust to 1.65.0 +- [AVRO-3808]: Drop support for Python 3.6, add Pypy 3.8-3.10 +- [AVRO-3875]: [Rust]: Set "readme" metadata for each package separately +- [AVRO-3915]: [Rust] Extract dependencies used by more than one member crates into the workspace +- [AVRO-3937]: [Rust]: Use cargo-deny to check the dependencies' licenses +- [AVRO-3944]: Fix CMake warning +- [AVRO-3945]: Fix issues reported by cppcheck +- [AVRO-3967]: Replace boost::format with fmt +- [AVRO-3978]: Build with Java 11 minimum + +## Language SDK / Convenience artifacts + +* C#: https://www.nuget.org/packages/Apache.Avro/1.12.0 +* Java: https://repo1.maven.org/maven2/org/apache/avro/avro/1.12.0/ +* Javascript: https://www.npmjs.com/package/avro-js/v/1.12.0 +* Perl: https://metacpan.org/release/Avro +* Python 3: https://pypi.org/project/avro/1.12.0 +* Ruby: https://rubygems.org/gems/avro/versions/1.12.0 +* Rust: https://crates.io/crates/apache-avro/0.17.0 + +Thanks to everyone for contributing! diff --git a/doc/content/en/docs/1.12.0/Getting started (Java)/_index.md b/doc/content/en/docs/1.12.0/Getting started (Java)/_index.md new file mode 100644 index 00000000000..429e9837641 --- /dev/null +++ b/doc/content/en/docs/1.12.0/Getting started (Java)/_index.md @@ -0,0 +1,289 @@ +--- +categories: [] +tags: ["java"] +title: "Getting Started (Java)" +linkTitle: "Getting Started (Java)" +weight: 2 +--- + + + +This is a short guide for getting started with Apache Avro™ using Java. This guide only covers using Avro for data serialization; see Patrick Hunt's [Avro RPC Quick Start](https://github.com/phunt/avro-rpc-quickstart) for a good introduction to using Avro for RPC. + +## Download + +Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the [Apache Avro™ Download]({{< relref "/project/download" >}}) page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. For the examples in this guide, download avro-{{< avro_version >}}.jar and avro-tools-{{< avro_version >}}.jar. + +Alternatively, if you are using Maven, add the following dependency to your POM: + +```xml + + org.apache.avro + avro + {{< avro_version >}} + +``` + +As well as the Avro Maven plugin (for performing code generation): + +```xml + + org.apache.avro + avro-maven-plugin + {{< avro_version >}} + + ${project.basedir}/src/main/avro/ + ${project.basedir}/src/main/java/ + + + + generate-sources + + schema + + + + + + org.apache.maven.plugins + maven-compiler-plugin + + 1.8 + 1.8 + + +``` + +You may also build the required Avro jars from source. Building Avro is beyond the scope of this guide; see the Build Documentation page in the wiki for more information. + +## Defining a schema + +Avro schemas are defined using JSON or IDL (the latter requires an extra dependency). Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing with code generation + +### Compiling the schema +Code generation allows us to automatically create classes based on our previously-defined schema. Once we have defined the relevant classes, there is no need to use the schema directly in our programs. We use the avro-tools jar to generate code as follows: + +```shell +java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema +``` + +This will generate the appropriate source files in a package based on the schema's namespace in the provided destination folder. For instance, to generate a User class in package example.avro from the schema defined above, run + +```shell +java -jar /path/to/avro-tools-{{< avro_version >}}.jar compile schema user.avsc . +``` + +Note that if you using the Avro Maven plugin, there is no need to manually invoke the schema compiler; the plugin automatically performs code generation on any .avsc files present in the configured source directory. + +### Creating Users +Now that we've completed the code generation, let's create some Users, serialize them to a data file on disk, and then read back the file and deserialize the User objects. + +First let's create some Users and set their fields. + +```java +User user1 = new User(); +user1.setName("Alyssa"); +user1.setFavoriteNumber(256); +// Leave favorite color null + +// Alternate constructor +User user2 = new User("Ben", 7, "red"); + +// Construct via builder +User user3 = User.newBuilder() + .setName("Charlie") + .setFavoriteColor("blue") + .setFavoriteNumber(null) + .build(); +``` + +As shown in this example, Avro objects can be created either by invoking a constructor directly or by using a builder. Unlike constructors, builders will automatically set any default values specified in the schema. Additionally, builders validate the data as it set, whereas objects constructed directly will not cause an error until the object is serialized. However, using constructors directly generally offers better performance, as builders create a copy of the datastructure before it is written. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. Similarly, we set user3's favorite number to null (using a builder requires setting all fields, even if they are null). + +### Serializing +Now let's serialize our Users to disk. + +```java +// Serialize user1, user2 and user3 to disk +DatumWriter userDatumWriter = new SpecificDatumWriter(User.class); +DataFileWriter dataFileWriter = new DataFileWriter(userDatumWriter); +dataFileWriter.create(user1.getSchema(), new File("users.avro")); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.append(user3); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. The SpecificDatumWriter class is used with generated classes and extracts the schema from the specified generated type. + +Next we create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, let's deserialize the data file we just created. + +```java +// Deserialize Users from disk +DatumReader userDatumReader = new SpecificDatumReader(User.class); +DataFileReader dataFileReader = new DataFileReader(file, userDatumReader); +User user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +} +``` + +This snippet will output: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +{"name": "Charlie", "favorite_number": null, "favorite_color": "blue"} +``` + +Deserializing is very similar to serializing. We create a SpecificDatumReader, analogous to the SpecificDatumWriter we used in serialization, which converts in-memory serialized items into instances of our generated class, in this case User. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file and the schema provided by the reader, in this case the User class. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next we use the DataFileReader to iterate through the serialized Users and print the deserialized object to stdout. Note how we perform the iteration: we create a single User object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same User object rather than allocating a new User for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (User user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile # includes code generation via Avro Maven plugin +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain +``` + +### Beta feature: Generating faster code +In release 1.9.0, we introduced a new approach to generating code that speeds up decoding of objects by more than 10% and encoding by more than 30% (future performance enhancements are underway). To ensure a smooth introduction of this change into production systems, this feature is controlled by a feature flag, the system property org.apache.avro.specific.use_custom_coders. In this first release, this feature is off by default. To turn it on, set the system flag to true at runtime. In the sample above, for example, you could enable the fater coders as follows: + +$ mvn -q exec:java -Dexec.mainClass=example.SpecificMain \ + -Dorg.apache.avro.specific.use_custom_coders=true + +Note that you do not have to recompile your Avro schema to have access to this feature. The feature is compiled and built into your code, and you turn it on and off at runtime using the feature flag. As a result, you can turn it on during testing, for example, and then off in production. Or you can turn it on in production, and quickly turn it off if something breaks. + +We encourage the Avro community to exercise this new feature early to help build confidence. (For those paying one-demand for compute resources in the cloud, it can lead to meaningful cost savings.) As confidence builds, we will turn this feature on by default, and eventually eliminate the feature flag (and the old code). + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. + +Let's go over the same example as in the previous section, but without using code generation: we'll create some users, serialize them to a data file on disk, and then read back the file and deserialize the users objects. + +### Creating users +First, we use a SchemaParser to read our schema definition and create a Schema object. + +```java +Schema schema = new SchemaParser().parse(new File("user.avsc")).mainSchema(); +``` + +Using this schema, let's create some users. + +```java +GenericRecord user1 = new GenericData.Record(schema); +user1.put("name", "Alyssa"); +user1.put("favorite_number", 256); +// Leave favorite color null + +GenericRecord user2 = new GenericData.Record(schema); +user2.put("name", "Ben"); +user2.put("favorite_number", 7); +user2.put("favorite_color", "red"); +``` + +Since we're not using code generation, we use GenericRecords to represent users. GenericRecord uses the schema to verify that we only specify valid fields. If we try to set a non-existent field (e.g., user1.put("favorite_animal", "cat")), we'll get an AvroRuntimeException when we run the program. + +Note that we do not set user1's favorite color. Since that record is of type ["string", "null"], we can either set it to a string or leave it null; it is essentially optional. + +### Serializing +Now that we've created our user objects, serializing and deserializing them is almost identical to the example above which uses code generation. The main difference is that we use generic instead of specific readers and writers. + +First we'll serialize our users to a data file on disk. + +```java +// Serialize user1 and user2 to disk +File file = new File("users.avro"); +DatumWriter datumWriter = new GenericDatumWriter(schema); +DataFileWriter dataFileWriter = new DataFileWriter(datumWriter); +dataFileWriter.create(schema, file); +dataFileWriter.append(user1); +dataFileWriter.append(user2); +dataFileWriter.close(); +``` + +We create a DatumWriter, which converts Java objects into an in-memory serialized format. Since we are not using code generation, we create a GenericDatumWriter. It requires the schema both to determine how to write the GenericRecords and to verify that all non-nullable fields are present. + +As in the code generation example, we also create a DataFileWriter, which writes the serialized records, as well as the schema, to the file specified in the dataFileWriter.create call. We write our users to the file via calls to the dataFileWriter.append method. When we are done writing, we close the data file. + +### Deserializing +Finally, we'll deserialize the data file we just created. + +```java +// Deserialize users from disk +DatumReader datumReader = new GenericDatumReader(schema); +DataFileReader dataFileReader = new DataFileReader(file, datumReader); +GenericRecord user = null; +while (dataFileReader.hasNext()) { +// Reuse user object by passing it to next(). This saves us from +// allocating and garbage collecting many objects for files with +// many items. +user = dataFileReader.next(user); +System.out.println(user); +``` + +This outputs: + +```json +{"name": "Alyssa", "favorite_number": 256, "favorite_color": null} +{"name": "Ben", "favorite_number": 7, "favorite_color": "red"} +``` + +Deserializing is very similar to serializing. We create a GenericDatumReader, analogous to the GenericDatumWriter we used in serialization, which converts in-memory serialized items into GenericRecords. We pass the DatumReader and the previously created File to a DataFileReader, analogous to the DataFileWriter, which reads both the schema used by the writer as well as the data from the file on disk. The data will be read using the writer's schema included in the file, and the reader's schema provided to the GenericDatumReader. The writer's schema is needed to know the order in which fields were written, while the reader's schema is needed to know what fields are expected and how to fill in default values for fields added since the file was written. If there are differences between the two schemas, they are resolved according to the Schema Resolution specification. + +Next, we use the DataFileReader to iterate through the serialized users and print the deserialized object to stdout. Note how we perform the iteration: we create a single GenericRecord object which we store the current deserialized user in, and pass this record object to every call of dataFileReader.next. This is a performance optimization that allows the DataFileReader to reuse the same record object rather than allocating a new GenericRecord for every iteration, which can be very expensive in terms of object allocation and garbage collection if we deserialize a large data file. While this technique is the standard way to iterate through a data file, it's also possible to use for (GenericRecord user : dataFileReader) if performance is not a concern. + +### Compiling and running the example code +This example code is included as a Maven project in the examples/java-example directory in the Avro docs. From this directory, execute the following commands to build and run the example: + +```shell +$ mvn compile +$ mvn -q exec:java -Dexec.mainClass=example.GenericMain +``` diff --git a/doc/content/en/docs/1.12.0/Getting started (Python)/_index.md b/doc/content/en/docs/1.12.0/Getting started (Python)/_index.md new file mode 100644 index 00000000000..44e3a8f37fd --- /dev/null +++ b/doc/content/en/docs/1.12.0/Getting started (Python)/_index.md @@ -0,0 +1,147 @@ +--- +categories: [] +tags: ["python"] +title: "Getting Started (Python)" +linkTitle: "Getting Started (Python)" +weight: 3 +--- + + + +This is a short guide for getting started with Apache Avro™ using Python. This guide only covers using Avro for data serialization; see Patrick Hunt's Avro RPC Quick Start for a good introduction to using Avro for RPC. + +## Notice for Python 3 users +A package called "avro-python3" had been provided to support Python 3 previously, but the codebase was consolidated into the "avro" package and that supports both Python 2 and 3 now. The avro-python3 package will be removed in the near future, so users should use the "avro" package instead. They are mostly API compatible, but there's a few minor difference (e.g., function name capitalization, such as avro.schema.Parse vs avro.schema.parse). + +## Download +For Python, the easiest way to get started is to install it from PyPI. Python's Avro API is available over PyPi. + +```shell +$ python3 -m pip install avro +``` + +The official releases of the Avro implementations for C, C++, C#, Java, PHP, Python, and Ruby can be downloaded from the Apache Avro™ Releases page. This guide uses Avro {{< avro_version >}}, the latest version at the time of writing. Download and unzip avro-{{< avro_version >}}.tar.gz, and install via python setup.py (this will probably require root privileges). Ensure that you can import avro from a Python prompt. + +```shell +$ tar xvf avro-{{< avro_version >}}.tar.gz +$ cd avro-{{< avro_version >}} +$ python setup.py install +$ python +>>> import avro # should not raise ImportError +``` + +Alternatively, you may build the Avro Python library from source. From your the root Avro directory, run the commands + +```shell +$ cd lang/py/ +$ python3 -m pip install -e . +$ python +``` + +## Defining a schema +Avro schemas are defined using JSON. Schemas are composed of primitive types (null, boolean, int, long, float, double, bytes, and string) and complex types (record, enum, array, map, union, and fixed). You can learn more about Avro schemas and types from the specification, but for now let's start with a simple schema example, user.avsc: + +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` + +This schema defines a record representing a hypothetical user. (Note that a schema file can only contain a single schema definition.) At minimum, a record definition must include its type ("type": "record"), a name ("name": "User"), and fields, in this case name, favorite_number, and favorite_color. We also define a namespace ("namespace": "example.avro"), which together with the name attribute defines the "full name" of the schema (example.avro.User in this case). + +Fields are defined via an array of objects, each of which defines a name and type (other attributes are optional, see the record specification for more details). The type attribute of a field is another schema object, which can be either a primitive or complex type. For example, the name field of our User schema is the primitive type string, whereas the favorite_number and favorite_color fields are both unions, represented by JSON arrays. unions are a complex type that can be any of the types listed in the array; e.g., favorite_number can either be an int or null, essentially making it an optional field. + +## Serializing and deserializing without code generation +Data in Avro is always stored with its corresponding schema, meaning we can always read a serialized item, regardless of whether we know the schema ahead of time. This allows us to perform serialization and deserialization without code generation. Note that the Avro Python library does not support code generation. + +Try running the following code snippet, which serializes two users to a data file on disk, and then reads back and deserializes the data file: + +```python +import avro.schema +from avro.datafile import DataFileReader, DataFileWriter +from avro.io import DatumReader, DatumWriter + +schema = avro.schema.parse(open("user.avsc", "rb").read()) + +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +writer.close() + +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +for user in reader: + print(user) +reader.close() +``` + +This outputs: + +```json +{'favorite_color': None, 'favorite_number': 256, 'name': 'Alyssa'} +{'favorite_color': 'red', 'favorite_number': 7, 'name': 'Ben'} +``` + +Do make sure that you open your files in binary mode (i.e. using the modes wb or rb respectively). Otherwise you might generate corrupt files due to automatic replacement of newline characters with the platform-specific representations. + +Let's take a closer look at what's going on here. + +```python +schema = avro.schema.parse(open("user.avsc", "rb").read()) +``` + +avro.schema.parse takes a string containing a JSON schema definition as input and outputs a avro.schema.Schema object (specifically a subclass of Schema, in this case RecordSchema). We're passing in the contents of our user.avsc schema file here. + +```python +writer = DataFileWriter(open("users.avro", "wb"), DatumWriter(), schema) +``` + +We create a DataFileWriter, which we'll use to write serialized items to a data file on disk. The DataFileWriter constructor takes three arguments: + +* The file we'll serialize to +* A DatumWriter, which is responsible for actually serializing the items to Avro's binary format (DatumWriters can be used separately from DataFileWriters, e.g., to perform IPC with Avro). +* The schema we're using. The DataFileWriter needs the schema both to write the schema to the data file, and to verify that the items we write are valid items and write the appropriate fields. + +```python +writer.append({"name": "Alyssa", "favorite_number": 256}) +writer.append({"name": "Ben", "favorite_number": 7, "favorite_color": "red"}) +``` + +We use DataFileWriter.append to add items to our data file. Avro records are represented as Python dicts. Since the field favorite_color has type ["string", "null"], we are not required to specify this field, as shown in the first append. Were we to omit the required name field, an exception would be raised. Any extra entries not corresponding to a field are present in the dict are ignored. + +```python +reader = DataFileReader(open("users.avro", "rb"), DatumReader()) +``` + +We open the file again, this time for reading back from disk. We use a DataFileReader and DatumReader analagous to the DataFileWriter and DatumWriter above. + +```python +for user in reader: + print(user) +``` + +The DataFileReader is an iterator that returns dicts corresponding to the serialized items. diff --git a/doc/content/en/docs/1.12.0/IDL Language/_index.md b/doc/content/en/docs/1.12.0/IDL Language/_index.md new file mode 100644 index 00000000000..7d0121274a9 --- /dev/null +++ b/doc/content/en/docs/1.12.0/IDL Language/_index.md @@ -0,0 +1,511 @@ +--- +title: "IDL Language" +linkTitle: "IDL Language" +weight: 201 +--- + + + +## Introduction +This document defines Avro IDL, a higher-level language for authoring Avro schemata. Before reading this document, you should have familiarity with the concepts of schemata and protocols, as well as the various primitive and complex types available in Avro. + +## Overview + +### Purpose +The aim of the Avro IDL language is to enable developers to author schemata in a way that feels more similar to common programming languages like Java, C++, or Python. Additionally, the Avro IDL language may feel more familiar for those users who have previously used the interface description languages (IDLs) in other frameworks like Thrift, Protocol Buffers, or CORBA. + +### Usage +Each Avro IDL file defines either a single Avro Protocol, or an Avro Schema with supporting named schemata in a namespace. When parsed, it thus yields either a Protocol or a Schema. These can be respectively written to JSON-format Avro Protocol files with extension .avpr or JSON-format Avro Schema files with extension .avsc. + +To convert a _.avdl_ file into a _.avpr_ file, it may be processed by the `idl` tool. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/namespaces.avdl /tmp/namespaces.avpr +$ head /tmp/namespaces.avpr +{ + "protocol" : "TestNamespace", + "namespace" : "avro.test.protocol", +``` +To convert a _.avdl_ file into a _.avsc_ file, it may be processed by the `idl` tool too. For example: +```shell +$ java -jar avro-tools.jar idl src/test/idl/input/schema_syntax_schema.avdl /tmp/schema_syntax.avsc +$ head /tmp/schema_syntax.avsc +{ + "type": "array", + "items": { + "type": "record", + "name": "StatusUpdate", +``` +The `idl` tool can also process input to and from _stdin_ and _stdout_. See `idl --help` for full usage information. + +A Maven plugin is also provided to compile .avdl files. To use it, add something like the following to your pom.xml: +```xml + + + + org.apache.avro + avro-maven-plugin + + + + idl + + + + + + +``` + +## Defining a Schema in Avro IDL +An Avro IDL file consists of exactly one (main) schema definition. The minimal schema is defined by the following code: +```java +schema int; +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type": "int" +} +``` +More complex schemata can also be defined, for example by adding named schemata like this: +```java +namespace default.namespace.for.named.schemata; +schema Message; + +record Message { + string? title = null; + string message; +} +``` +This is equivalent to (and generates) the following JSON schema definition: +```json +{ + "type" : "record", + "name" : "Message", + "namespace" : "default.namespace.for.named.schemata", + "fields" : [ { + "name" : "title", + "type" : [ "null", "string" ], + "default": null + }, { + "name" : "message", + "type" : "string" + } ] +} +``` +Schemata in Avro IDL can contain the following items: + +* Imports of external protocol and schema files (only named schemata are imported). +* Definitions of named schemata, including records, errors, enums, and fixeds. + +## Defining a Protocol in Avro IDL +An Avro IDL file consists of exactly one protocol definition. The minimal protocol is defined by the following code: +```java +protocol MyProtocol { +} +``` +This is equivalent to (and generates) the following JSON protocol definition: +```json +{ +"protocol" : "MyProtocol", + "types" : [ ], + "messages" : { + } +} +``` +The namespace of the protocol may be changed using the @namespace annotation: +```java +@namespace("mynamespace") +protocol MyProtocol { +} +``` +This notation is used throughout Avro IDL as a way of specifying properties for the annotated element, as will be described later in this document. + +Protocols in Avro IDL can contain the following items: + +* Imports of external protocol and schema files. +* Definitions of named schemata, including records, errors, enums, and fixeds. +* Definitions of RPC messages + +## Imports +Files may be imported in one of three formats: + +* An IDL file may be imported with a statement like: + + `import idl "foo.avdl";` + +* A JSON protocol file may be imported with a statement like: + + `import protocol "foo.avpr";` + +* A JSON schema file may be imported with a statement like: + + `import schema "foo.avsc";` + +When importing into an IDL schema file, only (named) types are imported into this file. When importing into an IDL protocol, messages are imported into the protocol as well. + +Imported file names are resolved relative to the current IDL file. + +## Defining an Enumeration +Enums are defined in Avro IDL using a syntax similar to C or Java. An Avro Enum supports optional default values. In the case that a reader schema is unable to recognize a symbol written by the writer, the reader will fall back to using the defined default value. This default is only used when an incompatible symbol is read. It is not used if the enum field is missing. + +Example Writer Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE, OVAL +} +``` +Example Reader Enum Definition +```java +enum Shapes { + SQUARE, TRIANGLE, CIRCLE +} = CIRCLE; +``` +In the above example, the reader will use the default value of `CIRCLE` whenever reading data written with the `OVAL` symbol of the writer. Also note that, unlike the JSON format, anonymous enums cannot be defined. + +## Defining a Fixed Length Field +Fixed fields are defined using the following syntax: +``` +fixed MD5(16); +``` +This example defines a fixed-length type called MD5, which contains 16 bytes. + +## Defining Records and Errors +Records are defined in Avro IDL using a syntax similar to a struct definition in C: +```java +record Employee { + string name; + boolean active = true; + long salary; +} +``` +The above example defines a record with the name “Employee” with three fields. + +To define an error, simply use the keyword _error_ instead of _record_. For example: +```java +error Kaboom { + string explanation; + int result_code = -1; +} +``` +Each field in a record or error consists of a type and a name, optional property annotations and an optional default value. + +A type reference in Avro IDL must be one of: + +* A primitive type +* A logical type +* A named schema (either defined or imported) +* A complex type (array, map, or union) + +### Primitive Types +The primitive types supported by Avro IDL are the same as those supported by Avro's JSON format. This list includes _int_, _long_, _string_, _boolean_, _float_, _double_, _null_, and _bytes_. + +### Logical Types +Some of the logical types supported by Avro's JSON format are directly supported by Avro IDL. The currently supported types are: + +* _decimal_ (logical type [decimal]({{< relref "specification#decimal" >}})) +* _date_ (logical type [date]({{< relref "specification#date" >}})) +* _time_ms_ (logical type [time-millis]({{< relref "specification#time-millisecond-precision" >}})) +* _timestamp_ms_ (logical type [timestamp-millis]({{< relref "specification#timestamp-millisecond-precision" >}})) +* _local_timestamp_ms_ (logical type [local-timestamp-millis]({{< relref "specification#local_timestamp_ms" >}})) +* _uuid_ (logical type [uuid]({{< relref "specification#uuid" >}})) + +For example: +```java +record Job { + string jobid; + date submitDate; + time_ms submitTime; + timestamp_ms finishTime; + decimal(9,2) finishRatio; + uuid pk = "a1a2a3a4-b1b2-c1c2-d1d2-d3d4d5d6d7d8"; +} +``` + +Logical types can also be specified via an annotation, which is useful for logical types for which a keyword does not exist: + +```java +record Job { + string jobid; + @logicalType("timestamp-micros") + long finishTime; +} +``` + +### References to Named Schemata +If a named schema has already been defined in the same Avro IDL file, it may be referenced by name as if it were a primitive type: +```java +record Card { + Suit suit; // refers to the enum Card defined above + int number; +} +``` + +### Default Values +Default values for fields may be optionally specified by using an equals sign after the field name followed by a JSON expression indicating the default value. This JSON is interpreted as described in the [spec]({{< relref "specification#schema-record" >}}). + +### Complex Types + +#### Arrays +Array types are written in a manner that will seem familiar to C++ or Java programmers. An array of any type t is denoted `array`. For example, an array of strings is denoted `array`, and a multidimensional array of Foo records would be `array>`. + +#### Maps +Map types are written similarly to array types. An array that contains values of type t is written `map`. As in the JSON schema format, all maps contain `string`-type keys. + +#### Unions +Union types are denoted as `union { typeA, typeB, typeC, ... }`. For example, this record contains a string field that is optional (unioned with null), and a field containing either a precise or a imprecise number: +```java +record RecordWithUnion { + union { null, string } optionalString; + union { decimal(12, 6), float } number; +} +``` +Note that the same restrictions apply to Avro IDL unions as apply to unions defined in the JSON format; namely, a union may not contain multiple elements of the same type. Also, fields/parameters that use the union type and have a default parameter must specify a default value of the same type as the **first** union type. + +Because it occurs so often, there is a special shorthand to denote a union of `null` with one other schema. The first three fields in the following snippet have identical schemata, as do the last two fields: + +```java +record RecordWithUnion { + union { null, string } optionalString1 = null; + string? optionalString2 = null; + string? optionalString3; // No default value + + union { string, null } optionalString4 = "something"; + string? optionalString5 = "something else"; +} +``` + +Note that unlike explicit unions, the position of the `null` type is fluid; it will be the first or last type depending on the default value (if any). So all fields are valid in the example above. + +## Defining RPC Messages +The syntax to define an RPC message within a Avro IDL protocol is similar to the syntax for a method declaration within a C header file or a Java interface. To define an RPC message _add_ which takes two arguments named _foo_ and _bar_, returning an _int_, simply include the following definition within the protocol: +```java +int add(int foo, int bar = 0); +``` +Message arguments, like record fields, may specify default values. + +To define a message with no response, you may use the alias _void_, equivalent to the Avro _null_ type: +```java +void logMessage(string message); +``` +If you have defined or imported an error type within the same protocol, you may declare that a message can throw this error using the syntax: +```java +void goKaboom() throws Kaboom; +``` +To define a one-way message, use the keyword `oneway` after the parameter list, for example: +```java +void fireAndForget(string message) oneway; +``` + +## Other Language Features + +### Comments and documentation +All Java-style comments are supported within a Avro IDL file. Any text following _//_ on a line is ignored, as is any text between _/*_ and _*/_, possibly spanning multiple lines. + +Comments that begin with _/**_ are used as the documentation string for the type or field definition that follows the comment. + +### Escaping Identifiers +Occasionally, one may want to distinguish between identifiers and languages keywords. In order to do so, backticks (`) may be used to escape +the identifier. For example, to define a message with the literal name error, you may write: +```java +void `error`(); +``` +This syntax is allowed anywhere an identifier is expected. + +### Annotations for Ordering and Namespaces +Java-style annotations may be used to add additional properties to types and fields throughout Avro IDL. These can be custom properties, or +special properties as used in the JSON-format Avro Schema and Protocol files. + +For example, to specify the sort order of a field within a record, one may use the `@order` annotation before the field name as follows: +```java +record MyRecord { + string @order("ascending") myAscendingSortField; + string @order("descending") myDescendingField; + string @order("ignore") myIgnoredField; +} +``` +A field's type (with the exception of type references) may also be preceded by annotations, e.g.: +```java +record MyRecord { + @java-class("java.util.ArrayList") array myStrings; +} +``` +This can be used to support java classes that can be serialized/deserialized via their `toString`/`String constructor`, e.g.: +```java +record MyRecord { + @java-class("java.math.BigDecimal") string value; + @java-key-class("java.io.File") map fileStates; + array<@java-class("java.math.BigDecimal") string> weights; +} +``` +Similarly, a `@namespace` annotation may be used to modify the namespace when defining a named schema. For example: +```java +@namespace("org.apache.avro.firstNamespace") +protocol MyProto { + @namespace("org.apache.avro.someOtherNamespace") + record Foo {} + + record Bar {} +} +``` +will define a protocol in the _firstNamespace_ namespace. The record _Foo_ will be defined in _someOtherNamespace_ and _Bar_ will be defined in _firstNamespace_ as it inherits its default from its container. + +Type and field aliases are specified with the `@aliases` annotation as follows: +```java +@aliases(["org.old.OldRecord", "org.ancient.AncientRecord"]) +record MyRecord { + string @aliases(["oldField", "ancientField"]) myNewField; +} +``` +Some annotations like those listed above are handled specially. All other annotations are added as properties to the protocol, message, schema or field. You can use any identifier or series of identifiers separated by dots and/or dashes as property name. + +## Complete Example +The following is an example of two Avro IDL files that together show most of the above features: + +### schema.avdl +```java +/* + * Header with license information. + */ +// Optional default namespace (if absent, the default namespace is the null namespace). +namespace org.apache.avro.test; +// Optional main schema definition; if used, the IDL file is equivalent to a .avsc file. +schema TestRecord; + +/** Documentation for the enum type Kind */ +@aliases(["org.foo.KindOf"]) +enum Kind { + FOO, + BAR, // the bar enum value + BAZ +} = FOO; // For schema evolution purposes, unmatched values do not throw an error, but are resolved to FOO. + +/** MD5 hash; good enough to avoid most collisions, and smaller than (for example) SHA256. */ +fixed MD5(16); + +record TestRecord { + /** Record name; has no intrinsic order */ + string @order("ignore") name; + + Kind @order("descending") kind; + + MD5 hash; + + /* + Note that 'null' is the first union type. Just like .avsc / .avpr files, the default value must be of the first union type. + */ + union { null, MD5 } /** Optional field */ @aliases(["hash"]) nullableHash = null; + // Shorthand syntax; the null in this union is placed based on the default value (or first is there's no default). + MD5? anotherNullableHash = null; + + array arrayOfLongs; +} +``` + +### protocol.avdl +```java +/* + * Header with license information. + */ + +/** + * An example protocol in Avro IDL + */ +@namespace("org.apache.avro.test") +protocol Simple { + // Import the example file above + import idl "schema.avdl"; + + /** Errors are records that can be thrown from a method */ + error TestError { + string message; + } + + string hello(string greeting); + /** Return what was given. Demonstrates the use of backticks to name types/fields/messages/parameters after keywords */ + TestRecord echo(TestRecord `record`); + int add(int arg1, int arg2); + bytes echoBytes(bytes data); + void `error`() throws TestError; + // The oneway keyword forces the method to return null. + void ping() oneway; +} +``` + +Additional examples may be found in the Avro source tree under the `src/test/idl/input` directory. + +## IDE support + +There are several editors and IDEs that support Avro IDL files, usually via plugins. + +### JetBrains + +Apache Avro IDL Schema Support 203.1.2 was released in 9 December 2021. + +Features: +* Syntax Highlighting +* Code Completion +* Code Formatting +* Error Highlighting +* Inspections & quick fixes +* JSON schemas for .avpr and .avsc files + +It's available via the [JetBrains Marketplace](https://plugins.jetbrains.com/plugin/15728-apache-avro-idl-schema-support) +and on [GitHub](https://github.com/opwvhk/avro-schema-support). + +The plugin supports almost the all JetBrains products: IntelliJ IDEA, PyCharm, WebStorm, Android Studio, AppCode, GoLand, Rider, CLion, RubyMine, PhpStorm, DataGrip, DataSpell, MPS, Code With Me Guest and JetBrains Client. + +Only JetBrains Gateway does not support this plugin directly. But the backend (JetBrains) IDE that it connects to does. + +### Eclipse + +Avroclipse 0.0.11 was released on 4 December 2019. + +Features: +* Syntax Highlighting +* Error Highlighting +* Code Completion + +It is available on the [Eclipse Marketplace](https://marketplace.eclipse.org/content/avroclipse) +and [GitHub](https://github.com/dvdkruk/avroclipse). + +### Visual Studio Code + +avro-idl 0.5.0 was released on 16 June 2021. It provides syntax highlighting. + +It is available on the [VisualStudio Marketplace](https://marketplace.visualstudio.com/items?itemName=streetsidesoftware.avro) +and [GitHub](https://github.com/Jason3S/vscode-avro-ext) + +### Atom.io + +atom-language-avro 0.0.13 was released on 14 August 2015. It provides syntax highlighting. + +It is available as [Atom.io package](https://atom.io/packages/atom-language-avro) +and [GitHub](https://github.com/jonesetc/atom-language-avro) + +### Vim + +A `.avdl` detecting plugin by Gurpreet Atwal on [GitHub](https://github.com/gurpreetatwal/vim-avro) (Last change in December 2016) + +[avro-idl.vim](https://github.com/apache/avro/blob/main/share/editors/avro-idl.vim) in the Avro repository `share/editors` directory (last change in September 2010) + +Both provide syntax highlighting. diff --git a/doc/content/en/docs/1.12.0/MapReduce guide/_index.md b/doc/content/en/docs/1.12.0/MapReduce guide/_index.md new file mode 100644 index 00000000000..f262bc6e2a7 --- /dev/null +++ b/doc/content/en/docs/1.12.0/MapReduce guide/_index.md @@ -0,0 +1,396 @@ +--- +title: "MapReduce guide" +linkTitle: "MapReduce guide" +weight: 200 +--- + + + +Avro provides a convenient way to represent complex data structures within a Hadoop MapReduce job. Avro data can be used as both input to and output from a MapReduce job, as well as the intermediate format. The example in this guide uses Avro data for all three, but it's possible to mix and match; for instance, MapReduce can be used to aggregate a particular field in an Avro record. + +This guide assumes basic familiarity with both Hadoop MapReduce and Avro. See the [Hadoop documentation](https://hadoop.apache.org/docs/current/) and the [Avro getting started guide](./getting-started-java/) for introductions to these projects. This guide uses the old MapReduce API (`org.apache.hadoop.mapred`) and the new MapReduce API (`org.apache.hadoop.mapreduce`). + +## Setup +The code from this guide is included in the Avro docs under examples/mr-example. The example is set up as a Maven project that includes the necessary Avro and MapReduce dependencies and the Avro Maven plugin for code generation, so no external jars are needed to run the example. In particular, the POM includes the following dependencies: +```xml + + org.apache.avro + avro + {{< avro_version >}} + + + org.apache.avro + avro-mapred + {{< avro_version >}} + + + org.apache.hadoop + hadoop-client + 3.1.2 + +``` +And the following plugin: +```xml + + org.apache.avro + avro-maven-plugin + {{< avro_version >}} + + + generate-sources + + schema + + + ${project.basedir}/../ + ${project.basedir}/target/generated-sources/ + + + + +``` + +If you do not configure the *sourceDirectory* and *outputDirectory* properties, the defaults will be used. The *sourceDirectory* property defaults to *src/main/avro*. The *outputDirectory* property defaults to *target/generated-sources*. You can change the paths to match your project layout. + +Alternatively, Avro jars can be downloaded directly from the Apache Avro™ Releases [page](https://avro.apache.org/releases.html). The relevant Avro jars for this guide are *avro-{{< avro_version >}}.jar* and *avro-mapred-{{< avro_version >}}.jar*, as well as *avro-tools-{{< avro_version >}}.jar* for code generation and viewing Avro data files as JSON. In addition, you will need to install Hadoop in order to use MapReduce. + +## Example: ColorCount +Below is a simple example of a MapReduce that uses Avro. There is an example for both the old (org.apache.hadoop.mapred) and new (org.apache.hadoop.mapreduce) APIs under *examples/mr-example/src/main/java/example/*. _MapredColorCount_ is the example for the older mapred API while _MapReduceColorCount_ is the example for the newer mapreduce API. Both examples are below, but we will detail the mapred API in our subsequent examples. + +MapredColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.*; +import org.apache.avro.Schema.Type; +import org.apache.avro.mapred.*; +import org.apache.hadoop.conf.*; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.mapred.*; +import org.apache.hadoop.util.*; + +import example.avro.User; + +public class MapredColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } + } + + public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapredColorCount "); + return -1; + } + + JobConf conf = new JobConf(getConf(), MapredColorCount.class); + conf.setJobName("colorcount"); + + FileInputFormat.setInputPaths(conf, new Path(args[0])); + FileOutputFormat.setOutputPath(conf, new Path(args[1])); + + AvroJob.setMapperClass(conf, ColorCountMapper.class); + AvroJob.setReducerClass(conf, ColorCountReducer.class); + + // Note that AvroJob.setInputSchema and AvroJob.setOutputSchema set + // relevant config options such as input/output format, map output + // classes, and output key class. + AvroJob.setInputSchema(conf, User.getClassSchema()); + AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); + + JobClient.runJob(conf); + return 0; + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new Configuration(), new MapredColorCount(), args); + System.exit(res); + } +} +``` + +MapReduceColorCount.java: +```java +package example; + +import java.io.IOException; + +import org.apache.avro.Schema; +import org.apache.avro.mapred.AvroKey; +import org.apache.avro.mapred.AvroValue; +import org.apache.avro.mapreduce.AvroJob; +import org.apache.avro.mapreduce.AvroKeyInputFormat; +import org.apache.avro.mapreduce.AvroKeyValueOutputFormat; +import org.apache.hadoop.conf.Configured; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.io.IntWritable; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.io.Text; +import org.apache.hadoop.mapreduce.Job; +import org.apache.hadoop.mapreduce.Mapper; +import org.apache.hadoop.mapreduce.Reducer; +import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; +import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; +import org.apache.hadoop.util.Tool; +import org.apache.hadoop.util.ToolRunner; + +import example.avro.User; + +public class MapReduceColorCount extends Configured implements Tool { + + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } + + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } + + public int run(String[] args) throws Exception { + if (args.length != 2) { + System.err.println("Usage: MapReduceColorCount "); + return -1; + } + + Job job = new Job(getConf()); + job.setJarByClass(MapReduceColorCount.class); + job.setJobName("Color Count"); + + FileInputFormat.setInputPaths(job, new Path(args[0])); + FileOutputFormat.setOutputPath(job, new Path(args[1])); + + job.setInputFormatClass(AvroKeyInputFormat.class); + job.setMapperClass(ColorCountMapper.class); + AvroJob.setInputKeySchema(job, User.getClassSchema()); + job.setMapOutputKeyClass(Text.class); + job.setMapOutputValueClass(IntWritable.class); + + job.setOutputFormatClass(AvroKeyValueOutputFormat.class); + job.setReducerClass(ColorCountReducer.class); + AvroJob.setOutputKeySchema(job, Schema.create(Schema.Type.STRING)); + AvroJob.setOutputValueSchema(job, Schema.create(Schema.Type.INT)); + + return (job.waitForCompletion(true) ? 0 : 1); + } + + public static void main(String[] args) throws Exception { + int res = ToolRunner.run(new MapReduceColorCount(), args); + System.exit(res); + } +} +``` +ColorCount reads in data files containing *User* records, defined in _examples/user.avsc_, and counts the number of instances of each favorite color. (This example draws inspiration from the canonical _WordCount_ MapReduce application.) This example uses the old MapReduce API. See MapReduceAvroWordCount, found under _doc/examples/mr-example/src/main/java/example/_ to see the new MapReduce API example. The User schema is defined as follows: +```json +{"namespace": "example.avro", + "type": "record", + "name": "User", + "fields": [ + {"name": "name", "type": "string"}, + {"name": "favorite_number", "type": ["int", "null"]}, + {"name": "favorite_color", "type": ["string", "null"]} + ] +} +``` +This schema is compiled into the *User* class used by *ColorCount* via the Avro Maven plugin (see _examples/mr-example/pom.xml_ for how this is set up). + +*ColorCountMapper* essentially takes a *User* as input and extracts the User's favorite color, emitting the key-value pair ``. _ColorCountReducer_ then adds up how many occurrences of a particular favorite color were emitted, and outputs the result as a Pair record. These Pairs are serialized to an Avro data file. + +## Running ColorCount +The _ColorCount_ application is provided as a Maven project in the Avro docs under _examples/mr-example_. To build the project, including the code generation of the User schema, run: +```shell +mvn compile +``` +Next, run _GenerateData_ from `examples/mr-examples` to create an Avro data file, `input/users.avro`, containing 20 Users with favorite colors chosen randomly from a list: +```shell +mvn exec:java -q -Dexec.mainClass=example.GenerateData +``` +Besides creating the data file, GenerateData prints the JSON representations of the Users generated to stdout, for example: +```json +{"name": "user", "favorite_number": null, "favorite_color": "red"} +{"name": "user", "favorite_number": null, "favorite_color": "green"} +{"name": "user", "favorite_number": null, "favorite_color": "purple"} +{"name": "user", "favorite_number": null, "favorite_color": null} +... +``` +Now we're ready to run ColorCount. We specify our freshly-generated input folder as the input path and output as our output folder (note that MapReduce will not start a job if the output folder already exists): +```shell +mvn exec:java -q -Dexec.mainClass=example.MapredColorCount -Dexec.args="input output" +``` +Once ColorCount completes, checking the contents of the new output directory should yield the following: +```shell +$ ls output/ +part-00000.avro _SUCCESS +``` +You can check the contents of the generated Avro file using the avro-tools jar: +```shell +$ java -jar /path/to/avro-tools-{{< avro_version >}}.jar tojson output/part-00000.avro +{"value": 3, "key": "blue"} +{"value": 7, "key": "green"} +{"value": 1, "key": "none"} +{"value": 2, "key": "orange"} +{"value": 3, "key": "purple"} +{"value": 2, "key": "red"} +{"value": 2, "key": "yellow"} +``` +Now let's go over the ColorCount example in detail. + +## AvroMapper - org.apache.hadoop.mapred API + +The easiest way to use Avro data files as input to a MapReduce job is to subclass `AvroMapper`. An `AvroMapper` defines a `map` function that takes an Avro datum as input and outputs a key/value pair represented as a Pair record. In the ColorCount example, ColorCountMapper is an AvroMapper that takes a User as input and outputs a `Pair>`, where the CharSequence key is the user's favorite color and the Integer value is 1. +```java +public static class ColorCountMapper extends AvroMapper> { + @Override + public void map(User user, AvroCollector> collector, Reporter reporter) + throws IOException { + CharSequence color = user.getFavoriteColor(); + // We need this check because the User.favorite_color field has type ["string", "null"] + if (color == null) { + color = "none"; + } + collector.collect(new Pair(color, 1)); + } +} +``` +In order to use our AvroMapper, we must call AvroJob.setMapperClass and AvroJob.setInputSchema. +```java +AvroJob.setMapperClass(conf, ColorCountMapper.class); +AvroJob.setInputSchema(conf, User.getClassSchema()); +``` +Note that `AvroMapper` does not implement the `Mapper` interface. Under the hood, the specified Avro data files are deserialized into AvroWrappers containing the actual data, which are processed by a Mapper that calls the configured AvroMapper's map function. AvroJob.setInputSchema sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setMapperClass`, `JobConf.setInputFormat`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Mapper - org.apache.hadoop.mapreduce API +This document will not go into all the differences between the mapred and mapreduce APIs, however will describe the main differences. As you can see, ColorCountMapper is now a subclass of the Hadoop Mapper class and is passed an AvroKey as it's key. Additionally, the AvroJob method calls were slightly changed. +```java + public static class ColorCountMapper extends + Mapper, NullWritable, Text, IntWritable> { + + @Override + public void map(AvroKey key, NullWritable value, Context context) + throws IOException, InterruptedException { + + CharSequence color = key.datum().getFavoriteColor(); + if (color == null) { + color = "none"; + } + context.write(new Text(color.toString()), new IntWritable(1)); + } + } +``` + +## AvroReducer - org.apache.hadoop.mapred API +Analogously to AvroMapper, an AvroReducer defines a reducer function that takes the key/value types output by an AvroMapper (or any mapper that outputs Pairs) and outputs a key/value pair represented a Pair record. In the ColorCount example, ColorCountReducer is an AvroReducer that takes the CharSequence key representing a favorite color and the `Iterable` representing the counts for that color (they should all be 1 in this example) and adds up the counts. +```java +public static class ColorCountReducer extends AvroReducer> { + @Override + public void reduce(CharSequence key, Iterable values, + AvroCollector> collector, + Reporter reporter) + throws IOException { + int sum = 0; + for (Integer value : values) { + sum += value; + } + collector.collect(new Pair(key, sum)); + } +} +``` +In order to use our AvroReducer, we must call AvroJob.setReducerClass and AvroJob.setOutputSchema. +```java +AvroJob.setReducerClass(conf, ColorCountReducer.class); +AvroJob.setOutputSchema(conf, Pair.getPairSchema(Schema.create(Type.STRING), + Schema.create(Type.INT))); +``` +Note that _AvroReducer_ does not implement the _Reducer_ interface. The intermediate Pairs output by the mapper are split into _AvroKeys_ and _AvroValues_, which are processed by a Reducer that calls the configured AvroReducer's `reduce` function. `AvroJob.setOutputSchema` sets up the relevant configuration parameters needed to make this happen, thus you should not need to call `JobConf.setReducerClass`, `JobConf.setOutputFormat`, `JobConf.setOutputKeyClass`, `JobConf.setMapOutputKeyClass`, `JobConf.setMapOutputValueClass`, or `JobConf.setOutputKeyComparatorClass`. + +## Reduce - org.apache.hadoop.mapreduce API +As before we not detail every difference between the APIs. As with the _Mapper_ change _ColorCountReducer_ is now a subclass of _Reducer_ and _AvroKey_ and _AvroValue_ are emitted. Additionally, the _AvroJob_ method calls were slightly changed. +```java + public static class ColorCountReducer extends + Reducer, AvroValue> { + + @Override + public void reduce(Text key, Iterable values, + Context context) throws IOException, InterruptedException { + + int sum = 0; + for (IntWritable value : values) { + sum += value.get(); + } + context.write(new AvroKey(key.toString()), new AvroValue(sum)); + } + } +``` + +## Learning more +The mapred API allows users to mix Avro AvroMappers and AvroReducers with non-Avro Mappers and Reducers and the mapreduce API allows users input Avro and output non-Avro or vice versa. + +The mapred package has API org.apache.avro.mapred documentation as does the `org.apache.avro.mapreduce` package. MapReduce API (`org.apache.hadoop.mapreduce`). Similarily to the mapreduce package, it's possible with the mapred API to implement your own Mappers and Reducers directly using the public classes provided in these libraries. See the `AvroWordCount` application, found under _examples/mr-example/src/main/java/example/AvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the old MapReduce API. See the `MapReduceAvroWordCount` application, found under _examples/mr-example/src/main/java/example/MapReduceAvroWordCount.java_ in the Avro documentation, for an example of implementing a Reducer that outputs Avro data using the new MapReduce API. diff --git a/doc/content/en/docs/1.12.0/SASL profile/_index.md b/doc/content/en/docs/1.12.0/SASL profile/_index.md new file mode 100644 index 00000000000..a938310414d --- /dev/null +++ b/doc/content/en/docs/1.12.0/SASL profile/_index.md @@ -0,0 +1,93 @@ +--- +title: "SASL profile" +linkTitle: "SASL profile" +weight: 202 +--- + + + +## Introduction +SASL ([RFC 2222](https://www.ietf.org/rfc/rfc2222.txt)) provides a framework for authentication and security of network protocols. Each protocol that uses SASL is meant to define a SASL profile. This document provides a SASL profile for connection-based Avro RPC. + +## Overview +SASL negotiation proceeds as a series of message interactions over a connection between a client and server using a selected SASL mechanism. The client starts this negotiation by sending its chosen mechanism name with an initial (possibly empty) message. Negotiation proceeds with the exchange of messages until either side indicates success or failure. The content of the messages is mechanism-specific. If the negotiation succeeds, then the session can proceed over the connection, otherwise it must be abandoned. + +Some mechanisms continue to process session data after negotiation (e.g., encrypting it), while some specify that further session data is transmitted unmodified. + +## Negotiation + +### Commands +Avro SASL negotiation uses four one-byte commands. + +* 0: START Used in a client's initial message. +* 1: CONTINUE Used while negotiation is ongoing. +* 2: FAIL Terminates negotiation unsuccessfully. +* 3: COMPLETE Terminates negotiation successfully. + +The format of a START message is: + +`| 0 | 4-byte mechanism name length | mechanism name | 4-byte payload length | payload data |` + +The format of a CONTINUE message is: + +`| 1 | 4-byte payload length | payload data |` + +The format of a FAIL message is: + +`| 2 | 4-byte message length | UTF-8 message |` + +The format of a COMPLETE message is: + +`| 3 | 4-byte payload length | payload data |` + +### Process +Negotiation is initiated by a client sending a START command containing the client's chosen mechanism name and any mechanism-specific payload data. + +The server and client then interchange some number (possibly zero) of CONTINUE messages. Each message contains payload data that is processed by the security mechanism to generate the next message. + +Once either the client or server send a FAIL message then negotiation has failed. UTF-8-encoded text is included in the failure message. Once either a FAIL message has been sent or received, or any other error occurs in the negotiation, further communication on this connection must cease. + +Once either the client or server send a COMPLETE message then negotiation has completed successfully. Session data may now be transmitted over the connection until it is closed by either side. + +## Session Data +If no SASL QOP (quality of protection) is negotiated, then all subsequent writes to/reads over this connection are written/read unmodified. In particular, messages use Avro [framing](#Message+Framing), and are of the form: + +`| 4-byte frame length | frame data | ... | 4 zero bytes |` + +If a SASL QOP is negotiated, then it must be used by the connection for all subsequent messages. This is done by wrapping each non-empty frame written using the security mechanism and unwrapping each non-empty frame read. The length written in each non-empty frame is the length of the wrapped data. Complete frames must be passed to the security mechanism for unwrapping. Unwrapped data is then passed to the application as the content of the frame. + +If at any point processing fails due to wrapping, unwrapping or framing errors, then all further communication on this connection must cease. + +## Anonymous Mechanism +The SASL anonymous mechanism ([RFC 2245](https://www.ietf.org/rfc/rfc2222.txt)) is quite simple to implement. In particular, an initial anonymous request may be prefixed by the following static sequence: + +`| 0 | 0009 | ANONYMOUS | 0000 |` + +If a server uses the anonymous mechanism, it should check that the mechanism name in the start message prefixing the first request received is 'ANONYMOUS', then simply prefix its initial response with a COMPLETE message of: + +`| 3 | 0000 |` + +If an anonymous server recieves some other mechanism name, then it may respond with a FAIL message as simple as: + +`| 2 | 0000 |` + +Note that the anonymous mechanism need add no additional round-trip messages between client and server. The START message can be piggybacked on the initial request and the COMPLETE or FAIL message can be piggybacked on the initial response. diff --git a/doc/content/en/docs/1.12.0/Specification/_index.md b/doc/content/en/docs/1.12.0/Specification/_index.md new file mode 100644 index 00000000000..75eda7b7f62 --- /dev/null +++ b/doc/content/en/docs/1.12.0/Specification/_index.md @@ -0,0 +1,896 @@ +--- +title: "Specification" +linkTitle: "Specification" +weight: 4 +date: 2021-10-25 +aliases: +- spec.html +--- + + + +## Introduction +This document defines Apache Avro. It is intended to be the authoritative specification. Implementations of Avro must adhere to this document. + +## Schema Declaration {#schema-declaration} +A Schema is represented in [JSON](https://www.json.org/) by one of: + +* A JSON string, naming a defined type. +* A JSON object, of the form: +```js +{"type": "typeName", ...attributes...} +``` +where _typeName_ is either a primitive or derived type name, as defined below. Attributes not defined in this document are permitted as metadata, but must not affect the format of serialized data. +* A JSON array, representing a union of embedded types. + +## Primitive Types +The set of primitive type names is: + +* _null_: no value +* _boolean_: a binary value +* _int_: 32-bit signed integer +* _long_: 64-bit signed integer +* _float_: single precision (32-bit) IEEE 754 floating-point number +* _double_: double precision (64-bit) IEEE 754 floating-point number +* _bytes_: sequence of 8-bit unsigned bytes +* _string_: unicode character sequence + +Primitive types have no specified attributes. + +Primitive type names are also defined type names. Thus, for example, the schema "string" is equivalent to: +```json +{"type": "string"} +``` + +## Complex Types +Avro supports six kinds of complex types: _records_, _enums_, _arrays_, _maps_, _unions_ and _fixed_. + +### Records {#schema-record} +Records use the type name "record" and support the following attributes: + +* _name_: a JSON string providing the name of the record (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _aliases_: a JSON array of strings, providing alternate names for this record (optional). +* _fields_: a JSON array, listing fields (required). Each field is a JSON object with the following attributes: + * _name_: a JSON string providing the name of the field (required), and + * _doc_: a JSON string describing this field for users (optional). + * _type_: a [schema]({{< ref "#schema-declaration" >}} "Schema declaration"), as defined above + * _order_: specifies how this field impacts sort ordering of this record (optional). Valid values are "ascending" (the default), "descending", or "ignore". For more details on how this is used, see the sort order section below. + * _aliases_: a JSON array of strings, providing alternate names for this field (optional). + * _default_: A default value for this field, only used when reading instances that lack the field for schema evolution purposes. The presence of a default value does not make the field optional at encoding time. Permitted values depend on the field's schema type, according to the table below. Default values for union fields correspond to the first schema that matches in the union. Default values for bytes and fixed fields are JSON strings, where Unicode code points 0-255 are mapped to unsigned 8-bit byte values 0-255. Avro encodes a field even if its value is equal to its default. + +*field default values* + +| **avro type** | **json type** | **example** | +|---------------|----------------|-------------| +| null | null | `null` | +| boolean | boolean | `true` | +| int,long | integer | `1` | +| float,double | number | `1.1` | +| bytes | string | `"\u00FF"` | +| string | string | `"foo"` | +| record | object | `{"a": 1}` | +| enum | string | `"FOO"` | +| array | array | `[1]` | +| map | object | `{"a": 1}` | +| fixed | string | `"\u00ff"` | + +For example, a linked-list of 64-bit values may be defined with: +```jsonc +{ + "type": "record", + "name": "LongList", + "aliases": ["LinkedLongs"], // old name for this + "fields" : [ + {"name": "value", "type": "long"}, // each element has a long + {"name": "next", "type": ["null", "LongList"]} // optional next element + ] +} +``` + +### Enums +Enums use the type name "enum" and support the following attributes: + +* _name_: a JSON string providing the name of the enum (required). +* _namespace_, a JSON string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _doc_: a JSON string providing documentation to the user of this schema (optional). +* _symbols_: a JSON array, listing symbols, as JSON strings (required). All symbols in an enum must be unique; duplicates are prohibited. Every symbol must match the regular expression [A-Za-z_][A-Za-z0-9_]* (the same requirement as for [names]({{< ref "#names" >}} "Names")). +* _default_: A default value for this enumeration, used during resolution when the reader encounters a symbol from the writer that isn't defined in the reader's schema (optional). The value provided here must be a JSON string that's a member of the symbols array. See documentation on schema resolution for how this gets used. + +For example, playing card suits might be defined with: +```json +{ + "type": "enum", + "name": "Suit", + "symbols" : ["SPADES", "HEARTS", "DIAMONDS", "CLUBS"] +} +``` + +### Arrays +Arrays use the type name "array" and support a single attribute: + +* _items_: the schema of the array's items. + +For example, an array of strings is declared with: +```json +{ + "type": "array", + "items" : "string", + "default": [] +} +``` + +### Maps +Maps use the type name "map" and support one attribute: + +* _values_: the schema of the map's values. + +Map keys are assumed to be strings. + +For example, a map from string to long is declared with: +```json +{ + "type": "map", + "values" : "long", + "default": {} +} +``` + +### Unions +Unions, as mentioned above, are represented using JSON arrays. For example, `["null", "string"]` declares a schema which may be either a null or string. + +(Note that when a [default value]({{< ref "#schema-record" >}} "Schema record") is specified for a record field whose type is a union, the type of the default value must match with one element of the union. + +Unions may not contain more than one schema with the same type, except for the named types record, fixed and enum. For example, unions containing two array types or two map types are not permitted, but two types with different names are permitted. (Names permit efficient resolution when reading and writing unions.) + +Unions may not immediately contain other unions. + +### Fixed +Fixed uses the type name "fixed" and supports the following attributes: + +* _name_: a string naming this fixed (required). +* _namespace_, a string that qualifies the name (optional); +* _aliases_: a JSON array of strings, providing alternate names for this enum (optional). +* _size_: an integer, specifying the number of bytes per value (required). + +For example, 16-byte quantity may be declared with: +```json +{"type": "fixed", "size": 16, "name": "md5"} +``` + +### Names +Record, enums and fixed are named types. Each has a fullname that is composed of two parts: a name and a namespace, separated by a dot. Equality of names is defined on the fullname – it is an error to specify two different types with the same name. + +Record fields and enum symbols have names as well (but no namespace). Equality of field names and enum symbols is defined within their scope (the record/enum that defines them). It is an error to define multiple fields or enum symbols with the same name in a single type. Fields and enum symbols across scopes are never equal, so field names and enum symbols can be reused in a different type. + +The name portion of the fullname of named types, record field names, and enum symbols must: + +* start with [A-Za-z_] +* subsequently contain only [A-Za-z0-9_] + +A namespace is a dot-separated sequence of such names. The empty string may also be used as a namespace to indicate the null namespace. Equality of names (including field names and enum symbols) as well as fullnames is case-sensitive. + +The null namespace may not be used in a dot-separated sequence of names. So the grammar for a namespace is: +``` + | [()*] +``` + +In record, enum and fixed definitions, the fullname is determined according to the algorithm below the example: + +``` +{ + "type": "record", + "name": "Example", + "doc": "A simple name (attribute) and no namespace attribute: use the null namespace (\"\"); the fullname is 'Example'.", + "fields": [ + { + "name": "inheritNull", + "type": { + "type": "enum", + "name": "Simple", + "doc": "A simple name (attribute) and no namespace attribute: inherit the null namespace of the enclosing type 'Example'. The fullname is 'Simple'.", + "symbols": ["a", "b"] + } + }, { + "name": "explicitNamespace", + "type": { + "type": "fixed", + "name": "Simple", + "namespace": "explicit", + "doc": "A simple name (attribute) and a namespace (attribute); the fullname is 'explicit.Simple' (this is a different type than of the 'inheritNull' field).", + "size": 12 + } + }, { + "name": "fullName", + "type": { + "type": "record", + "name": "a.full.Name", + "namespace": "ignored", + "doc": "A name attribute with a fullname, so the namespace attribute is ignored. The fullname is 'a.full.Name', and the namespace is 'a.full'.", + "fields": [ + { + "name": "inheritNamespace", + "type": { + "type": "enum", + "name": "Understanding", + "doc": "A simple name (attribute) and no namespace attribute: inherit the namespace of the enclosing type 'a.full.Name'. The fullname is 'a.full.Understanding'.", + "symbols": ["d", "e"] + } + } + ] + } + } + ] +} +``` + +The fullname of a record, enum or fixed definition is determined by the required `name` and optional `namespace` attributes like this: + +* A fullname is specified. If the name specified contains a dot, then it is assumed to be a fullname, and any namespace also specified is ignored. For example, use "name": "org.foo.X" to indicate the fullname org.foo.X. +* A simple name (a name that contains no dots) and namespace are both specified. For example, one might use "name": "X", "namespace": "org.foo" to indicate the fullname org.foo.X. +* A simple name only is specified (a name that contains no dots). In this case the namespace is taken from the most tightly enclosing named schema or protocol, and the fullname is constructed from that namespace and the name. For example, if "name": "X" is specified, and this occurs within a field of the record definition of org.foo.Y, then the fullname is org.foo.X. This also happens if there is no enclosing namespace (i.e., the enclosing schema definition has the null namespace). + +References to previously defined names are as in the latter two cases above: if they contain a dot they are a fullname, if they do not contain a dot, the namespace is the namespace of the enclosing definition. + +Primitive type names (`null`, `boolean`, `int`, `long`, `float`, `double`, `bytes`, `string`) have no namespace and their names may not be defined in any namespace. + +Complex types (`record`, `enum`, `array`, `map`, `fixed`) have no namespace, but their names (as well as `union`) are permitted to be reused as type names. This can be confusing to the human reader, but is always unambiguous for binary serialization. Due to the limitations of JSON encoding, it is a best practice to use a namespace when using these names. + +A schema or protocol may not contain multiple definitions of a fullname. Further, a name must be defined before it is used ("before" in the depth-first, left-to-right traversal of the JSON parse tree, where the types attribute of a protocol is always deemed to come "before" the messages attribute.) + +### Aliases +Named types and fields may have aliases. An implementation may optionally use aliases to map a writer's schema to the reader's. This facilitates both schema evolution as well as processing disparate datasets. + +Aliases function by re-writing the writer's schema using aliases from the reader's schema. For example, if the writer's schema was named "Foo" and the reader's schema is named "Bar" and has an alias of "Foo", then the implementation would act as though "Foo" were named "Bar" when reading. Similarly, if data was written as a record with a field named "x" and is read as a record with a field named "y" with alias "x", then the implementation would act as though "x" were named "y" when reading. + +A type alias may be specified either as a fully namespace-qualified, or relative to the namespace of the name it is an alias for. For example, if a type named "a.b" has aliases of "c" and "x.y", then the fully qualified names of its aliases are "a.c" and "x.y". + +Aliases are alternative names, and thus subject to the same uniqueness constraints as names. Aliases should be valid names, but this is not required: any string is accepted as an alias. When aliases are used "to map a writer's schema to the reader's" (see above), this allows schema evolution to correct illegal names in old schemata. + +## Fixing an invalid, but previously accepted, schema +Over time, rules and validations on schemas have changed. It is therefore possible that a schema used to work with an older version of Avro, but now fails to parse. + +This can have several reasons, as listed below. Each reason also describes a fix, which can be applied using [schema resolution]({{< ref "#schema-resolution" >}}): you fix the problems in the schema in a way that is compatible, and then you can use the new schema to read the old data. + +### Invalid names +Invalid names of types and fields can be corrected by renaming (using an [alias]({{< ref "#aliases" >}})). This works for simple names, namespaces and fullnames. + +This fix is twofold: first, you add the invalid name as an alias to the type/field. Then, you change the name to any valid name. + +### Invalid defaults +Default values are only used to fill in missing data when reading. Invalid defaults create invalid values in these cases. The fix is to correct the default values. + + +## Data Serialization and Deserialization +Binary encoded Avro data does not include type information or field names. The benefit is that the serialized data is small, but as a result a schema must always be used in order to read Avro data correctly. The best way to ensure that the schema is structurally identical to the one used to write the data is to use the exact same schema. + +Therefore, files or systems that store Avro data should always include the writer's schema for that data. Avro-based remote procedure call (RPC) systems must also guarantee that remote recipients of data have a copy of the schema used to write that data. In general, it is advisable that any reader of Avro data should use a schema that is the same (as defined more fully in [Parsing Canonical Form for Schemas]({{< ref "#parsing-canonical-form-for-schemas" >}} "Parsing Canonical Form for Schemas")) as the schema that was used to write the data in order to deserialize it correctly. Deserializing data into a newer schema is accomplished by specifying an additional schema, the results of which are described in [Schema Resolution]({{< ref "#schema-resolution" >}}). + +In general, both serialization and deserialization proceed as a depth-first, left-to-right traversal of the schema, serializing or deserializing primitive types as they are encountered. Therefore, it is possible, though not advisable, to read Avro data with a schema that does not have the same Parsing Canonical Form as the schema with which the data was written. In order for this to work, the serialized primitive values must be compatible, in order value by value, with the items in the deserialization schema. For example, int and long are always serialized the same way, so an int could be deserialized as a long. Since the compatibility of two schemas depends on both the data and the serialization format (eg. binary is more permissive than JSON because JSON includes field names, eg. a long that is too large will overflow an int), it is simpler and more reliable to use schemas with identical Parsing Canonical Form. + +### Encodings +Avro specifies two serialization encodings: binary and JSON. Most applications will use the binary encoding, as it is smaller and faster. But, for debugging and web-based applications, the JSON encoding may sometimes be appropriate. + +### Binary Encoding {#binary-encoding} +Binary encoding does not include field names, self-contained information about the types of individual bytes, nor field or record separators. Therefore readers are wholly reliant on the schema used when the data was encoded. + +#### Primitive Types +Primitive types are encoded in binary as follows: + +* _null_ is written as zero bytes. +* a _boolean_ is written as a single byte whose value is either 0 (false) or 1 (true). +* _int_ and _long_ values are written using [variable-length](https://lucene.apache.org/java/3_5_0/fileformats.html#VInt) [zig-zag](https://code.google.com/apis/protocolbuffers/docs/encoding.html#types) coding. Some examples: + +| *value* | *hex* | +|---|---| +| 0 | 00 | +|-1 | 01 | +| 1 | 02 | +|-2 | 03 | +| 2 | 04 | +|...|...| +|-64 | 7f | +|64 | 80 01| +|...|...| + +* a _float_ is written as 4 bytes. The float is converted into a 32-bit integer using a method equivalent to Java's [floatToRawIntBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Float.html#floatToRawIntBits-float-) and then encoded in little-endian format. +* a _double_ is written as 8 bytes. The double is converted into a 64-bit integer using a method equivalent to Java's [doubleToRawLongBits](https://docs.oracle.com/javase/8/docs/api/java/lang/Double.html#doubleToRawLongBits-double-) and then encoded in little-endian format. +* _bytes_ are encoded as a long followed by that many bytes of data. +* a _string_ is encoded as a long followed by that many bytes of UTF-8 encoded character data. +For example, the three-character string "foo" would be encoded as the long value 3 (encoded as hex 06) followed by the UTF-8 encoding of 'f', 'o', and 'o' (the hex bytes 66 6f 6f): +``` +06 66 6f 6f +``` + +### Complex Types +Complex types are encoded in binary as follows: + +#### Records +A record is encoded by encoding the values of its fields in the order that they are declared. In other words, a record is encoded as just the concatenation of the encodings of its fields. Field values are encoded per their schema. + +For example, the record schema +```json +{ + "type": "record", + "name": "test", + "fields" : [ + {"name": "a", "type": "long"}, + {"name": "b", "type": "string"} + ] +} +``` + +An instance of this record whose a field has value 27 (encoded as hex 36) and whose b field has value "foo" (encoded as hex bytes 06 66 6f 6f), would be encoded simply as the concatenation of these, namely the hex byte sequence: +``` +36 06 66 6f 6f +``` + +#### Enums +An enum is encoded by a int, representing the zero-based position of the symbol in the schema. + +For example, consider the enum: +```json +{"type": "enum", "name": "Foo", "symbols": ["A", "B", "C", "D"] } +``` + +This would be encoded by an int between zero and three, with zero indicating "A", and 3 indicating "D". + +#### Arrays +Arrays are encoded as a series of blocks. Each block consists of a long count value, followed by that many array items. A block with count zero indicates the end of the array. Each item is encoded per the array's item schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a long block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +For example, the array schema +```json +{"type": "array", "items": "long"} +``` +an array containing the items 3 and 27 could be encoded as the long value 2 (encoded as hex 04) followed by long values 3 and 27 (encoded as hex 06 36) terminated by zero: +``` +04 06 36 00 +``` + +The blocked representation permits one to read and write arrays larger than can be buffered in memory, since one can start writing items without knowing the full length of the array. + +#### Maps {#schema-maps} +Maps are encoded as a series of _blocks_. Each block consists of a `long` _count_ value, followed by that many key/value pairs. A block with count zero indicates the end of the map. Each item is encoded per the map's value schema. + +If a block's count is negative, its absolute value is used, and the count is followed immediately by a `long` block size indicating the number of bytes in the block. This block size permits fast skipping through data, e.g., when projecting a record to a subset of its fields. + +The blocked representation permits one to read and write maps larger than can be buffered in memory, since one can start writing items without knowing the full length of the map. + +#### Unions +A union is encoded by first writing an `int` value indicating the zero-based position within the union of the schema of its value. The value is then encoded per the indicated schema within the union. + +For example, the union schema `["null","string"]` would encode: + +* _null_ as zero (the index of "null" in the union): +`00` +* the string "a" as one (the index of "string" in the union, 1, encoded as hex 02), followed by the serialized string: +`02 02 61` +NOTE: Currently for C/C++ implementations, the positions are practically an int, but theoretically a long. In reality, we don't expect unions with 215M members + +#### Fixed +Fixed instances are encoded using the number of bytes declared in the schema. + +### JSON Encoding +Except for unions, the JSON encoding is the same as is used to encode [field default values]({{< ref "#schema-record" >}}). + +The value of a union is encoded in JSON as follows: + +* if its type is _null_, then it is encoded as a JSON _null_; +* otherwise it is encoded as a JSON object with one name/value pair whose name is the type's name and whose value is the recursively encoded value. For Avro's named types (record, fixed or enum) the user-specified name is used, for other types the type name is used. + +For example, the union schema `["null","string","Foo"]`, where Foo is a record name, would encode: + +* _null_ as _null_; +* the string "a" as `{"string": "a"}` and +* a Foo instance as `{"Foo": {...}}`, where `{...}` indicates the JSON encoding of a Foo instance. + +Note that the original schema is still required to correctly process JSON-encoded data. For example, the JSON encoding does not distinguish between _int_ and _long_, _float_ and _double_, records and maps, enums and strings, etc. + +### Single-object encoding +In some situations a single Avro serialized object is to be stored for a longer period of time. One very common example is storing Avro records for several weeks in an [Apache Kafka](https://kafka.apache.org/) topic. + +In the period after a schema change this persistence system will contain records that have been written with different schemas. So the need arises to know which schema was used to write a record to support schema evolution correctly. In most cases the schema itself is too large to include in the message, so this binary wrapper format supports the use case more effectively. + +#### Single object encoding specification +Single Avro objects are encoded as follows: + +1. A two-byte marker, `C3 01`, to show that the message is Avro and uses this single-record format (version 1). +1. The 8-byte little-endian CRC-64-AVRO [fingerprint]({{< ref "#schema-fingerprints" >}} "Schema fingerprints") of the object's schema. +1. The Avro object encoded using [Avro's binary encoding]({{< ref "#binary-encoding" >}}). + +Implementations use the 2-byte marker to determine whether a payload is Avro. This check helps avoid expensive lookups that resolve the schema from a fingerprint, when the message is not an encoded Avro payload. + +## Sort Order +Avro defines a standard sort order for data. This permits data written by one system to be efficiently sorted by another system. This can be an important optimization, as sort order comparisons are sometimes the most frequent per-object operation. Note also that Avro binary-encoded data can be efficiently ordered without deserializing it to objects. + +Data items may only be compared if they have identical schemas. Pairwise comparisons are implemented recursively with a depth-first, left-to-right traversal of the schema. The first mismatch encountered determines the order of the items. + +Two items with the same schema are compared according to the following rules. + +* _null_ data is always equal. +* _boolean_ data is ordered with false before true. +* _int_, _long_, _float_ and _double_ data is ordered by ascending numeric value. +* _bytes_ and fixed data are compared lexicographically by unsigned 8-bit values. +* _string_ data is compared lexicographically by Unicode code point. Note that since UTF-8 is used as the binary encoding for strings, sorting of bytes and string binary data is identical. +* _array_ data is compared lexicographically by element. +* _enum_ data is ordered by the symbol's position in the enum schema. For example, an enum whose symbols are `["z", "a"]` would sort "z" values before "a" values. +* _union_ data is first ordered by the branch within the union, and, within that, by the type of the branch. For example, an `["int", "string"]` union would order all int values before all string values, with the ints and strings themselves ordered as defined above. +* _record_ data is ordered lexicographically by field. If a field specifies that its order is: + * "ascending", then the order of its values is unaltered. + * "descending", then the order of its values is reversed. + * "ignore", then its values are ignored when sorting. +* _map_ data may not be compared. It is an error to attempt to compare data containing maps unless those maps are in an `"order":"ignore"` record field. + +## Object Container Files +Avro includes a simple object container file format. A file has a schema, and all objects stored in the file must be written according to that schema, using binary encoding. Objects are stored in blocks that may be compressed. Syncronization markers are used between blocks to permit efficient splitting of files for MapReduce processing. + +Files may include arbitrary user-specified metadata. + +A file consists of: + +* A file header, followed by +* one or more file data blocks. + +A file header consists of: + +* Four bytes, ASCII 'O', 'b', 'j', followed by 1. +* file metadata, including the schema. +* The 16-byte, randomly-generated sync marker for this file. + +File metadata is written as if defined by the following [map]({{< ref "#schema-maps" >}}) schema: +```json +{"type": "map", "values": "bytes"} +``` +All metadata properties that start with "avro." are reserved. The following file metadata properties are currently used: + +* **avro.schema** contains the schema of objects stored in the file, as JSON data (required). +* **avro.codec** the name of the compression codec used to compress blocks, as a string. Implementations are required to support the following codecs: "null" and "deflate". If codec is absent, it is assumed to be "null". The codecs are described with more detail below. + +A file header is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.Header", + "fields" : [ + {"name": "magic", "type": {"type": "fixed", "name": "Magic", "size": 4}}, + {"name": "meta", "type": {"type": "map", "values": "bytes"}}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +A file data block consists of: + +* A long indicating the count of objects in this block. +* A long indicating the size in bytes of the serialized objects in the current block, after any codec is applied +* The serialized objects. If a codec is specified, this is compressed by that codec. +* The file's 16-byte sync marker. + +A file data block is thus described by the following schema: +```json +{"type": "record", "name": "org.apache.avro.file.DataBlock", + "fields" : [ + {"name": "count", "type": "long"}, + {"name": "data", "type": "bytes"}, + {"name": "sync", "type": {"type": "fixed", "name": "Sync", "size": 16}} + ] +} +``` + +Each block's binary data can be efficiently extracted or skipped without deserializing the contents. The combination of block size, object counts, and sync markers enable detection of corrupt blocks and help ensure data integrity. + +### Required Codecs + +_null_ + +The "null" codec simply passes through data uncompressed. + +_deflate_ + +The "deflate" codec writes the data block using the deflate algorithm as specified in [RFC 1951](https://www.isi.edu/in-notes/rfc1951.txt), and typically implemented using the zlib library. Note that this format (unlike the "zlib format" in RFC 1950) does not have a checksum. + +### Optional Codecs +_bzip2_ + +The "bzip2" codec uses the [bzip2](https://sourceware.org/bzip2/) compression library. + +_snappy_ + +The "snappy" codec uses Google's [Snappy](https://code.google.com/p/snappy/) compression library. Each compressed block is followed by the 4-byte, big-endian CRC32 checksum of the uncompressed data in the block. + +_xz_ + +The "xz" codec uses the [XZ](https://tukaani.org/xz/) compression library. + +_zstandard_ + +The "zstandard" codec uses Facebook's [Zstandard](https://facebook.github.io/zstd/) compression library. + +### Protocol Declaration +Avro protocols describe RPC interfaces. Like schemas, they are defined with JSON text. + +A protocol is a JSON object with the following attributes: + +* _protocol_, a string, the name of the protocol (required); +* _namespace_, an optional string that qualifies the name (optional); +* _doc_, an optional string describing this protocol; +* _types_, an optional list of definitions of named types (records, enums, fixed and errors). An error definition is just like a record definition except it uses "error" instead of "record". Note that forward references to named types are not permitted. +* _messages_, an optional JSON object whose keys are message names and whose values are objects whose attributes are described below. No two messages may have the same name. + +The name and namespace qualification rules defined for schema objects apply to protocols as well. + +### Messages +A message has attributes: + +* a _doc_, an optional description of the message, +* a _request_, a list of named, typed parameter schemas (this has the same form as the fields of a record declaration); +* a _response_ schema; +* an optional union of declared error schemas. The effective union has "string" prepended to the declared union, to permit transmission of undeclared "system" errors. For example, if the declared error union is `["AccessError"]`, then the effective union is `["string", "AccessError"]`. When no errors are declared, the effective error union is `["string"]`. Errors are serialized using the effective union; however, a protocol's JSON declaration contains only the declared union. +* an optional one-way boolean parameter. + +A request parameter list is processed equivalently to an anonymous record. Since record field lists may vary between reader and writer, request parameters may also differ between the caller and responder, and such differences are resolved in the same manner as record field differences. + +The one-way parameter may only be true when the response type is `"null"` and no errors are listed. + +### Sample Protocol +For example, one may define a simple HelloWorld protocol with: +```json +{ + "namespace": "com.acme", + "protocol": "HelloWorld", + "doc": "Protocol Greetings", + + "types": [ + {"name": "Greeting", "type": "record", "fields": [ + {"name": "message", "type": "string"}]}, + {"name": "Curse", "type": "error", "fields": [ + {"name": "message", "type": "string"}]} + ], + + "messages": { + "hello": { + "doc": "Say hello.", + "request": [{"name": "greeting", "type": "Greeting" }], + "response": "Greeting", + "errors": ["Curse"] + } + } +} +``` + +## Protocol Wire Format + +### Message Transport +Messages may be transmitted via different transport mechanisms. + +To the transport, a _message_ is an opaque byte sequence. + +A transport is a system that supports: + +* **transmission of request messages** +* **receipt of corresponding response messages** +Servers may send a response message back to the client corresponding to a request message. The mechanism of correspondence is transport-specific. For example, in HTTP it is implicit, since HTTP directly supports requests and responses. But a transport that multiplexes many client threads over a single socket would need to tag messages with unique identifiers. + +Transports may be either stateless or stateful. In a stateless transport, messaging assumes no established connection state, while stateful transports establish connections that may be used for multiple messages. This distinction is discussed further in the [handshake](#handshake) section below. + +#### HTTP as Transport +When [HTTP](https://www.w3.org/Protocols/rfc2616/rfc2616.html) is used as a transport, each Avro message exchange is an HTTP request/response pair. All messages of an Avro protocol should share a single URL at an HTTP server. Other protocols may also use that URL. Both normal and error Avro response messages should use the 200 (OK) response code. The chunked encoding may be used for requests and responses, but, regardless the Avro request and response are the entire content of an HTTP request and response. The HTTP Content-Type of requests and responses should be specified as "avro/binary". Requests should be made using the POST method. + +HTTP is used by Avro as a stateless transport. + +### Message Framing +Avro messages are _framed_ as a list of buffers. + +Framing is a layer between messages and the transport. It exists to optimize certain operations. + +The format of framed message data is: + +* a series of buffers, where each buffer consists of: + * a four-byte, big-endian _buffer length_, followed by + * that many bytes of _buffer_ data. +* a message is always terminated by a zero-length buffer. + +Framing is transparent to request and response message formats (described below). Any message may be presented as a single or multiple buffers. + +Framing can permit readers to more efficiently get different buffers from different sources and for writers to more efficiently store different buffers to different destinations. In particular, it can reduce the number of times large binary objects are copied. For example, if an RPC parameter consists of a megabyte of file data, that data can be copied directly to a socket from a file descriptor, and, on the other end, it could be written directly to a file descriptor, never entering user space. + +A simple, recommended, framing policy is for writers to create a new segment whenever a single binary object is written that is larger than a normal output buffer. Small objects are then appended in buffers, while larger objects are written as their own buffers. When a reader then tries to read a large object the runtime can hand it an entire buffer directly, without having to copy it. + +### Handshake +The purpose of the handshake is to ensure that the client and the server have each other's protocol definition, so that the client can correctly deserialize responses, and the server can correctly deserialize requests. Both clients and servers should maintain a cache of recently seen protocols, so that, in most cases, a handshake will be completed without extra round-trip network exchanges or the transmission of full protocol text. + +RPC requests and responses may not be processed until a handshake has been completed. With a stateless transport, all requests and responses are prefixed by handshakes. With a stateful transport, handshakes are only attached to requests and responses until a successful handshake response has been returned over a connection. After this, request and response payloads are sent without handshakes for the lifetime of that connection. + +The handshake process uses the following record schemas: +```json +{ + "type": "record", + "name": "HandshakeRequest", "namespace":"org.apache.avro.ipc", + "fields": [ + {"name": "clientHash", + "type": {"type": "fixed", "name": "MD5", "size": 16}}, + {"name": "clientProtocol", "type": ["null", "string"]}, + {"name": "serverHash", "type": "MD5"}, + {"name": "meta", "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +{ + "type": "record", + "name": "HandshakeResponse", "namespace": "org.apache.avro.ipc", + "fields": [ + {"name": "match", + "type": {"type": "enum", "name": "HandshakeMatch", + "symbols": ["BOTH", "CLIENT", "NONE"]}}, + {"name": "serverProtocol", + "type": ["null", "string"]}, + {"name": "serverHash", + "type": ["null", {"type": "fixed", "name": "MD5", "size": 16}]}, + {"name": "meta", + "type": ["null", {"type": "map", "values": "bytes"}]} + ] +} +``` + +* A client first prefixes each request with a `HandshakeRequest` containing just the hash of its protocol and of the server's protocol (`clientHash!=null, clientProtocol=null, serverHash!=null`), where the hashes are 128-bit MD5 hashes of the JSON protocol text. If a client has never connected to a given server, it sends its hash as a guess of the server's hash, otherwise it sends the hash that it previously obtained from this server. +The server responds with a HandshakeResponse containing one of: + * `match=BOTH, serverProtocol=null, serverHash=null` if the client sent the valid hash of the server's protocol and the server knows what protocol corresponds to the client's hash. In this case, the request is complete and the response data immediately follows the HandshakeResponse. + * `match=CLIENT, serverProtocol!=null, serverHash!=null` if the server has previously seen the client's protocol, but the client sent an incorrect hash of the server's protocol. The request is complete and the response data immediately follows the HandshakeResponse. The client must use the returned protocol to process the response and should also cache that protocol and its hash for future interactions with this server. + * `match=NONE` if the server has not previously seen the client's protocol. The serverHash and serverProtocol may also be non-null if the server's protocol hash was incorrect. +In this case the client must then re-submit its request with its protocol text (`clientHash!=null, clientProtocol!=null, serverHash!=null`) and the server should respond with a successful match (match=BOTH, serverProtocol=null, serverHash=null) as above. + +The meta field is reserved for future handshake enhancements. + +### Call Format +A _call_ consists of a request message paired with its resulting response or error message. Requests and responses contain extensible metadata, and both kinds of messages are framed as described above. + +The format of a call request is: + +* _request metadata_, a map with values of type bytes +* the _message name_, an Avro string, followed by +* the _message parameters_. Parameters are serialized according to the message's request declaration. +When the empty string is used as a message name a server should ignore the parameters and return an empty response. A client may use this to ping a server or to perform a handshake without sending a protocol message. + +When a message is declared one-way and a stateful connection has been established by a successful handshake response, no response data is sent. Otherwise the format of the call response is: + +* _response metadata_, a map with values of type bytes +* a one-byte error _flag_ boolean, followed by either: + * if the error flag is false, the message _response_, serialized per the message's response schema. + * if the error flag is true, the _error_, serialized per the message's effective error union schema. + +### Schema Resolution {#schema-resolution} +A reader of Avro data, whether from an RPC or a file, can always parse that data because the original schema must be provided along with the data. However, the reader may be programmed to read data into a different schema. For example, if the data was written with a different version of the software than it is read, then fields may have been added or removed from records. This section specifies how such schema differences should be resolved. + +We refer to the schema used to write the data as the writer's schema, and the schema that the application expects the reader's schema. Differences between these should be resolved as follows: + +* It is an error if the two schemas do not _match_. +To match, one of the following must hold: + * both schemas are arrays whose item types match + * both schemas are maps whose value types match + * both schemas are enums whose (unqualified) names match + * both schemas are fixed whose sizes and (unqualified) names match + * both schemas are records with the same (unqualified) name + * either schema is a union + * both schemas have same primitive type + * the writer's schema may be promoted to the reader's as follows: + * int is promotable to long, float, or double + * long is promotable to float or double + * float is promotable to double + * string is promotable to bytes + * bytes is promotable to string +* **if both are records**: + * the ordering of fields may be different: fields are matched by name. + * schemas for fields with the same name in both records are resolved recursively. + * if the writer's record contains a field with a name not present in the reader's record, the writer's value for that field is ignored. + * if the reader's record schema has a field that contains a default value, and writer's schema does not have a field with the same name, then the reader should use the default value from its field. + * if the reader's record schema has a field with no default value, and writer's schema does not have a field with the same name, an error is signalled. +* **if both are enums**: +if the writer's symbol is not present in the reader's enum and the reader has a default value, then that value is used, otherwise an error is signalled. + +* **if both are arrays**: +This resolution algorithm is applied recursively to the reader's and writer's array item schemas. + +* **if both are maps**: +This resolution algorithm is applied recursively to the reader's and writer's value schemas. + +* **if both are unions**: +The first schema in the reader's union that matches the selected writer's union schema is recursively resolved against it. if none match, an error is signalled. + +* **if reader's is a union, but writer's is not** +The first schema in the reader's union that matches the writer's schema is recursively resolved against it. If none match, an error is signalled. + +* **if writer's is a union, but reader's is not** +If the reader's schema matches the selected writer's schema, it is recursively resolved against it. If they do not match, an error is signalled. + +A schema's _doc_ fields are ignored for the purposes of schema resolution. Hence, the _doc_ portion of a schema may be dropped at serialization. + +### Parsing Canonical Form for Schemas {#parsing-canonical-form-for-schemas} +One of the defining characteristics of Avro is that a reader must use the schema used by the writer of the data in order to know how to read the data. This assumption results in a data format that's compact and also amenable to many forms of schema evolution. However, the specification so far has not defined what it means for the reader to have the "same" schema as the writer. Does the schema need to be textually identical? Well, clearly adding or removing some whitespace to a JSON expression does not change its meaning. At the same time, reordering the fields of records clearly does change the meaning. So what does it mean for a reader to have "the same" schema as a writer? + +Parsing Canonical Form is a transformation of a writer's schema that let's us define what it means for two schemas to be "the same" for the purpose of reading data written against the schema. It is called Parsing Canonical Form because the transformations strip away parts of the schema, like "doc" attributes, that are irrelevant to readers trying to parse incoming data. It is called Canonical Form because the transformations normalize the JSON text (such as the order of attributes) in a way that eliminates unimportant differences between schemas. If the Parsing Canonical Forms of two different schemas are textually equal, then those schemas are "the same" as far as any reader is concerned, i.e., there is no serialized data that would allow a reader to distinguish data generated by a writer using one of the original schemas from data generated by a writing using the other original schema. (We sketch a proof of this property in a companion document.) + +The next subsection specifies the transformations that define Parsing Canonical Form. But with a well-defined canonical form, it can be convenient to go one step further, transforming these canonical forms into simple integers ("fingerprints") that can be used to uniquely identify schemas. The subsection after next recommends some standard practices for generating such fingerprints. + +#### Transforming into Parsing Canonical Form +Assuming an input schema (in JSON form) that's already UTF-8 text for a _valid_ Avro schema (including all quotes as required by JSON), the following transformations will produce its Parsing Canonical Form: + +* [PRIMITIVES] Convert primitive schemas to their simple form (e.g., int instead of `{"type":"int"}`). +* [FULLNAMES] Replace short names with fullnames, using applicable namespaces to do so. Then eliminate namespace attributes, which are now redundant. +* [STRIP] Keep only attributes that are relevant to parsing data, which are: _type_, _name_, _fields_, _symbols_, _items_, _values_, _size_. Strip all others (e.g., _doc_ and _aliases_). +* [ORDER] Order the appearance of fields of JSON objects as follows: _name_, _type_, _fields_, _symbols_, _items_, _values_, _size_. For example, if an object has _type_, _name_, and _size_ fields, then the _name_ field should appear first, followed by the _type_ and then the _size_ fields. +* [STRINGS] For all JSON string literals in the schema text, replace any escaped characters (e.g., \uXXXX escapes) with their UTF-8 equivalents. +* [INTEGERS] Eliminate quotes around and any leading zeros in front of JSON integer literals (which appear in the _size_ attributes of _fixed_ schemas). +* [WHITESPACE] Eliminate all whitespace in JSON outside of string literals. + +#### Schema Fingerprints {#schema-fingerprints} +"[A] fingerprinting algorithm is a procedure that maps an arbitrarily large data item (such as a computer file) to a much shorter bit string, its fingerprint, that uniquely identifies the original data for all practical purposes" (quoted from [Wikipedia](https://en.wikipedia.org/wiki/Fingerprint_(computing))). In the Avro context, fingerprints of Parsing Canonical Form can be useful in a number of applications; for example, to cache encoder and decoder objects, to tag data items with a short substitute for the writer's full schema, and to quickly negotiate common-case schemas between readers and writers. + +In designing fingerprinting algorithms, there is a fundamental trade-off between the length of the fingerprint and the probability of collisions. To help application designers find appropriate points within this trade-off space, while encouraging interoperability and ease of implementation, we recommend using one of the following three algorithms when fingerprinting Avro schemas: + +* When applications can tolerate longer fingerprints, we recommend using the [SHA-256 digest algorithm](https://en.wikipedia.org/wiki/SHA-2) to generate 256-bit fingerprints of Parsing Canonical Forms. Most languages today have SHA-256 implementations in their libraries. +* At the opposite extreme, the smallest fingerprint we recommend is a 64-bit [Rabin fingerprint](https://en.wikipedia.org/wiki/Rabin_fingerprint). Below, we provide pseudo-code for this algorithm that can be easily translated into any programming language. 64-bit fingerprints should guarantee uniqueness for schema caches of up to a million entries (for such a cache, the chance of a collision is 3E-8). We don't recommend shorter fingerprints, as the chances of collisions is too great (for example, with 32-bit fingerprints, a cache with as few as 100,000 schemas has a 50% chance of having a collision). +* Between these two extremes, we recommend using the [MD5 message digest](https://en.wikipedia.org/wiki/MD5) to generate 128-bit fingerprints. These make sense only where very large numbers of schemas are being manipulated (tens of millions); otherwise, 64-bit fingerprints should be sufficient. As with SHA-256, MD5 implementations are found in most libraries today. + +These fingerprints are not meant to provide any security guarantees, even the longer SHA-256-based ones. Most Avro applications should be surrounded by security measures that prevent attackers from writing random data and otherwise interfering with the consumers of schemas. We recommend that these surrounding mechanisms be used to prevent collision and pre-image attacks (i.e., "forgery") on schema fingerprints, rather than relying on the security properties of the fingerprints themselves. + +Rabin fingerprints are [cyclic redundancy checks](https://en.wikipedia.org/wiki/Cyclic_redundancy_check) computed using irreducible polynomials. In the style of the Appendix of [RFC 1952](https://www.ietf.org/rfc/rfc1952.txt) (pg 10), which defines the CRC-32 algorithm, here's our definition of the 64-bit AVRO fingerprinting algorithm: +```java +long fingerprint64(byte[] buf) { + if (FP_TABLE == null) initFPTable(); + long fp = EMPTY; + for (int i = 0; i < buf.length; i++) + fp = (fp >>> 8) ^ FP_TABLE[(int)(fp ^ buf[i]) & 0xff]; + return fp; +} + +static long EMPTY = 0xc15d213aa4d7a795L; +static long[] FP_TABLE = null; + +void initFPTable() { + FP_TABLE = new long[256]; + for (int i = 0; i < 256; i++) { + long fp = i; + for (int j = 0; j < 8; j++) + fp = (fp >>> 1) ^ (EMPTY & -(fp & 1L)); + FP_TABLE[i] = fp; + } +} +``` + +Readers interested in the mathematics behind this algorithm may want to read [Chapter 14 of the Second Edition of Hacker's Delight](https://books.google.com/books?id=XD9iAwAAQBAJ&pg=PA319). (Unlike RFC-1952 and the book chapter, we prepend a single one bit to messages. We do this because CRCs ignore leading zero bits, which can be problematic. Our code prepends a one-bit by initializing fingerprints using EMPTY, rather than initializing using zero as in RFC-1952 and the book chapter.) + +## Logical Types +A logical type is an Avro primitive or complex type with extra attributes to represent a derived type. The attribute `logicalType` must always be present for a logical type, and is a string with the name of one of the logical types listed later in this section. Other attributes may be defined for particular logical types. + +A logical type is always serialized using its underlying Avro type so that values are encoded in exactly the same way as the equivalent Avro type that does not have a `logicalType` attribute. Language implementations may choose to represent logical types with an appropriate native type, although this is not required. + +Language implementations must ignore unknown logical types when reading, and should use the underlying Avro type. If a logical type is invalid, for example a decimal with scale greater than its precision, then implementations should ignore the logical type and use the underlying Avro type. + +### Decimal +The `decimal` logical type represents an arbitrary-precision signed decimal number of the form _unscaled × 10-scale_. + +A `decimal` logical type annotates Avro _bytes_ or _fixed_ types. The byte array must contain the two's-complement representation of the unscaled integer value in big-endian byte order. The scale is fixed, and is specified using an attribute. + +The following attributes are supported: + +* _scale_, a JSON integer representing the scale (optional). If not specified the scale is 0. +* _precision_, a JSON integer representing the (maximum) precision of decimals stored in this type (required). +For example, the following schema represents decimal numbers with a maximum precision of 4 and a scale of 2: +```json +{ + "type": "bytes", + "logicalType": "decimal", + "precision": 4, + "scale": 2 +} +``` +Precision must be a positive integer greater than zero. If the underlying type is a _fixed_, then the precision is limited by its size. An array of length n can store at most _floor(log10(28 × n - 1 - 1))_ base-10 digits of precision. + +Scale must be zero or a positive integer less than or equal to the precision. + +For the purposes of schema resolution, two schemas that are `decimal` logical types _match_ if their scales and precisions match. + +**alternative** + +As it's not always possible to fix scale and precision in advance for a decimal field, `big-decimal` is another `decimal` logical type restrict to Avro _bytes_. + +_Currently only available in Java and Rust_. + +```json +{ + "type": "bytes", + "logicalType": "big-decimal" +} +``` +Here, as scale property is stored in value itself it needs more bytes than preceding `decimal` type, but it allows more flexibility. + +### UUID + +The `uuid` logical type represents a random generated universally unique identifier (UUID). + +A `uuid` logical type annotates an Avro `string` or `fixed` of length 16. Both the string and `fixed` byte layout have to conform with [RFC-4122](https://www.ietf.org/rfc/rfc4122.txt). + +The following schemas represent a uuid: + +```json +{ + "type": "string", + "logicalType": "uuid" +} +``` + +```json +{ + "type": "fixed", + "size": 16, + "logicalType": "uuid" +} +``` + +### Date +The `date` logical type represents a date within the calendar, with no reference to a particular time zone or time of day. + +A `date` logical type annotates an Avro `int`, where the int stores the number of days from the unix epoch, 1 January 1970 (ISO calendar). + +The following schema represents a date: +```json +{ + "type": "int", + "logicalType": "date" +} +``` + +### Time (millisecond precision) {#time_ms} +The `time-millis` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one millisecond. + +A `time-millis` logical type annotates an Avro `int`, where the int stores the number of milliseconds after midnight, 00:00:00.000. + +### Time (microsecond precision) +The `time-micros` logical type represents a time of day, with no reference to a particular calendar, time zone or date, with a precision of one microsecond. + +A `time-micros` logical type annotates an Avro `long`, where the long stores the number of microseconds after midnight, 00:00:00.000000. + +### Timestamps {#timestamps} + +The `timestamp-{millis,micros,nanos}` logical type represents an instant on the global timeline, independent of a particular time zone or calendar. Upon reading a value back, we can only reconstruct the instant, but not the original representation. In practice, such timestamps are typically displayed to users in their local time zones, therefore they may be displayed differently depending on the execution environment. + +- `timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds from the unix epoch, 1 January 1970 00:00:00.000. +- `timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds from the unix epoch, 1 January 1970 00:00:00.000000. +- `timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds from the unix epoch, 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is first shifted to UTC 2000-01-01T10:00:00 and that is then converted to Avro long 946720800000 (milliseconds) and written. + +### Local Timestamps {#local_timestamp} + +The `local-timestamp-{millis,micros,nanos}` logical type represents a timestamp in a local timezone, regardless of what specific time zone is considered local. + +- `local-timestamp-millis`: logical type annotates an Avro `long`, where the long stores the number of milliseconds, from 1 January 1970 00:00:00.000. +- `local-timestamp-micros`: logical type annotates an Avro `long`, where the long stores the number of microseconds, from 1 January 1970 00:00:00.000000. +- `local-timestamp-nanos`: logical type annotates an Avro `long`, where the long stores the number of nanoseconds, from 1 January 1970 00:00:00.000000000. + +Example: Given an event at noon local time (12:00) on January 1, 2000, in Helsinki where the local time was two hours east of UTC (UTC+2). The timestamp is converted to Avro long 946728000000 (milliseconds) and then written. + +### Duration +The `duration` logical type represents an amount of time defined by a number of months, days and milliseconds. This is not equivalent to a number of milliseconds, because, depending on the moment in time from which the duration is measured, the number of days in the month and number of milliseconds in a day may differ. Other standard periods such as years, quarters, hours and minutes can be expressed through these basic periods. + +A `duration` logical type annotates Avro `fixed` type of size 12, which stores three little-endian unsigned integers that represent durations at different granularities of time. The first stores a number in months, the second stores a number in days, and the third stores a number in milliseconds. diff --git a/doc/content/en/docs/1.12.0/_index.md b/doc/content/en/docs/1.12.0/_index.md new file mode 100644 index 00000000000..1139a1e011b --- /dev/null +++ b/doc/content/en/docs/1.12.0/_index.md @@ -0,0 +1,59 @@ +--- +title: "Apache Avro™ 1.12.0 Documentation" +linkTitle: "1.12.0" +type: docs +weight: 10 +--- + + + +## Introduction + +Apache Avro™ is a data serialization system. + +Avro provides: + +* Rich data structures. +* A compact, fast, binary data format. +* A container file, to store persistent data. +* Remote procedure call (RPC). +* Simple integration with dynamic languages. Code generation is not required to read or write data files nor to use or implement RPC protocols. Code generation as an optional optimization, only worth implementing for statically typed languages. + +## Schemas + +Avro relies on schemas. When Avro data is read, the schema used when writing it is always present. This permits each datum to be written with no per-value overheads, making serialization both fast and small. This also facilitates use with dynamic, scripting languages, since data, together with its schema, is fully self-describing. + +When Avro data is stored in a file, its schema is stored with it, so that files may be processed later by any program. If the program reading the data expects a different schema this can be easily resolved, since both schemas are present. + +When Avro is used in RPC, the client and server exchange schemas in the connection handshake. (This can be optimized so that, for most calls, no schemas are actually transmitted.) Since both client and server both have the other's full schema, correspondence between same named fields, missing fields, extra fields, etc. can all be easily resolved. + +Avro schemas are defined with JSON . This facilitates implementation in languages that already have JSON libraries. + +## Comparison with other systems + +Avro provides functionality similar to systems such as [Thrift](https://thrift.apache.org/), [Protocol Buffers](https://code.google.com/p/protobuf/), etc. Avro differs from these systems in the following fundamental aspects. + +* Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. +* Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. +* No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. + + diff --git a/doc/content/en/docs/1.12.0/api-c++.md b/doc/content/en/docs/1.12.0/api-c++.md new file mode 100644 index 00000000000..0ee54696c07 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-c++.md @@ -0,0 +1,29 @@ +--- +title: "C++ API" +linkTitle: "C++ API" +weight: 102 +manualLink: /docs/1.12.0/api/cpp/html/ +--- + + + +The C++ API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/api-c.md b/doc/content/en/docs/1.12.0/api-c.md new file mode 100644 index 00000000000..739f7758f58 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-c.md @@ -0,0 +1,29 @@ +--- +title: "C API" +linkTitle: "C API" +weight: 101 +manualLink: /docs/1.12.0/api/c/ +--- + + + +The C API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/api-csharp.md b/doc/content/en/docs/1.12.0/api-csharp.md new file mode 100644 index 00000000000..30e4eedb0ae --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-csharp.md @@ -0,0 +1,29 @@ +--- +title: "C# API" +linkTitle: "C# API" +weight: 103 +manualLink: /docs/1.12.0/api/csharp/html/ +--- + + + +The C# API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/api-java.md b/doc/content/en/docs/1.12.0/api-java.md new file mode 100644 index 00000000000..e1478755095 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-java.md @@ -0,0 +1,29 @@ +--- +title: "Java API" +linkTitle: "Java API" +weight: 100 +manualLink: /docs/1.12.0/api/java/ +--- + + + +The Javadocs can be found here. diff --git a/doc/content/en/docs/1.12.0/api-py.md b/doc/content/en/docs/1.12.0/api-py.md new file mode 100644 index 00000000000..94f54950095 --- /dev/null +++ b/doc/content/en/docs/1.12.0/api-py.md @@ -0,0 +1,29 @@ +--- +title: "Python API" +linkTitle: "Python API" +weight: 104 +manualLink: /docs/1.12.0/api/py/html/ +--- + + + +The Python API documentation can be found here. diff --git a/doc/content/en/docs/1.12.0/trevni/css/maven-base.css b/doc/content/en/docs/1.12.0/trevni/css/maven-base.css new file mode 100644 index 00000000000..45dc441c914 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/maven-base.css @@ -0,0 +1,168 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +body { + margin: 0px; + padding: 0px; +} +table { + padding:0px; + width: 100%; + margin-left: -2px; + margin-right: -2px; +} +acronym { + cursor: help; + border-bottom: 1px dotted #feb; +} +table.bodyTable th, table.bodyTable td { + padding: 2px 4px 2px 4px; + vertical-align: top; +} +div.clear { + clear:both; + visibility: hidden; +} +div.clear hr { + display: none; +} +#bannerLeft, #bannerRight { + font-size: xx-large; + font-weight: bold; +} +#bannerLeft img, #bannerRight img { + margin: 0px; +} +.xleft, #bannerLeft img { + float:left; +} +.xright, #bannerRight { + float:right; +} +#banner { + padding: 0px; +} +#breadcrumbs { + padding: 3px 10px 3px 10px; +} +#leftColumn { + width: 170px; + float:left; + overflow: auto; +} +#bodyColumn { + margin-right: 1.5em; + margin-left: 197px; +} +#legend { + padding: 8px 0 8px 0; +} +#navcolumn { + padding: 8px 4px 0 8px; +} +#navcolumn h5 { + margin: 0; + padding: 0; + font-size: small; +} +#navcolumn ul { + margin: 0; + padding: 0; + font-size: small; +} +#navcolumn li { + list-style-type: none; + background-image: none; + background-repeat: no-repeat; + background-position: 0 0.4em; + padding-left: 16px; + list-style-position: outside; + line-height: 1.2em; + font-size: smaller; +} +#navcolumn li.expanded { + background-image: url(../images/expanded.gif); +} +#navcolumn li.collapsed { + background-image: url(../images/collapsed.gif); +} +#navcolumn li.none { + text-indent: -1em; + margin-left: 1em; +} +#poweredBy { + text-align: center; +} +#navcolumn img { + margin-top: 10px; + margin-bottom: 3px; +} +#poweredBy img { + display:block; + margin: 20px 0 20px 17px; +} +#search img { + margin: 0px; + display: block; +} +#search #q, #search #btnG { + border: 1px solid #999; + margin-bottom:10px; +} +#search form { + margin: 0px; +} +#lastPublished { + font-size: x-small; +} +.navSection { + margin-bottom: 2px; + padding: 8px; +} +.navSectionHead { + font-weight: bold; + font-size: x-small; +} +.section { + padding: 4px; +} +#footer { + padding: 3px 10px 3px 10px; + font-size: x-small; +} +#breadcrumbs { + font-size: x-small; + margin: 0pt; +} +.source { + padding: 12px; + margin: 1em 7px 1em 7px; +} +.source pre { + margin: 0px; + padding: 0px; +} +#navcolumn img.imageLink, .imageLink { + padding-left: 0px; + padding-bottom: 0px; + padding-top: 0px; + padding-right: 2px; + border: 0px; + margin: 0px; +} diff --git a/doc/content/en/docs/1.12.0/trevni/css/maven-theme.css b/doc/content/en/docs/1.12.0/trevni/css/maven-theme.css new file mode 100644 index 00000000000..d3407e8ba8c --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/maven-theme.css @@ -0,0 +1,161 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +body { + padding: 0px 0px 10px 0px; +} +body, td, select, input, li{ + font-family: Verdana, Helvetica, Arial, sans-serif; + font-size: 13px; +} +code{ + font-family: Courier, monospace; + font-size: 13px; +} +a { + text-decoration: none; +} +a:link { + color:#36a; +} +a:visited { + color:#47a; +} +a:active, a:hover { + color:#69c; +} +#legend li.externalLink { + background: url(../images/external.png) left top no-repeat; + padding-left: 18px; +} +a.externalLink, a.externalLink:link, a.externalLink:visited, a.externalLink:active, a.externalLink:hover { + background: url(../images/external.png) right center no-repeat; + padding-right: 18px; +} +#legend li.newWindow { + background: url(../images/newwindow.png) left top no-repeat; + padding-left: 18px; +} +a.newWindow, a.newWindow:link, a.newWindow:visited, a.newWindow:active, a.newWindow:hover { + background: url(../images/newwindow.png) right center no-repeat; + padding-right: 18px; +} +h2 { + padding: 4px 4px 4px 6px; + border: 1px solid #999; + color: #900; + background-color: #ddd; + font-weight:900; + font-size: x-large; +} +h3 { + padding: 4px 4px 4px 6px; + border: 1px solid #aaa; + color: #900; + background-color: #eee; + font-weight: normal; + font-size: large; +} +h4 { + padding: 4px 4px 4px 6px; + border: 1px solid #bbb; + color: #900; + background-color: #fff; + font-weight: normal; + font-size: large; +} +h5 { + padding: 4px 4px 4px 6px; + color: #900; + font-size: medium; +} +p { + line-height: 1.3em; + font-size: small; +} +#breadcrumbs { + border-top: 1px solid #aaa; + border-bottom: 1px solid #aaa; + background-color: #ccc; +} +#leftColumn { + margin: 10px 0 0 5px; + border: 1px solid #999; + background-color: #eee; + padding-bottom: 3px; /* IE-9 scrollbar-fix */ +} +#navcolumn h5 { + font-size: smaller; + border-bottom: 1px solid #aaaaaa; + padding-top: 2px; + color: #000; +} + +table.bodyTable th { + color: white; + background-color: #bbb; + text-align: left; + font-weight: bold; +} + +table.bodyTable th, table.bodyTable td { + font-size: 1em; +} + +table.bodyTable tr.a { + background-color: #ddd; +} + +table.bodyTable tr.b { + background-color: #eee; +} + +.source { + border: 1px solid #999; +} +dl { + padding: 4px 4px 4px 6px; + border: 1px solid #aaa; + background-color: #ffc; +} +dt { + color: #900; +} +#organizationLogo img, #projectLogo img, #projectLogo span{ + margin: 8px; +} +#banner { + border-bottom: 1px solid #fff; +} +.errormark, .warningmark, .donemark, .infomark { + background: url(../images/icon_error_sml.gif) no-repeat; +} + +.warningmark { + background-image: url(../images/icon_warning_sml.gif); +} + +.donemark { + background-image: url(../images/icon_success_sml.gif); +} + +.infomark { + background-image: url(../images/icon_info_sml.gif); +} + diff --git a/doc/content/en/docs/1.12.0/trevni/css/print.css b/doc/content/en/docs/1.12.0/trevni/css/print.css new file mode 100644 index 00000000000..18fcbad7083 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/print.css @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#banner, #footer, #leftcol, #breadcrumbs, .docs #toc, .docs .courtesylinks, #leftColumn, #navColumn { + display: none !important; +} +#bodyColumn, body.docs div.docs { + margin: 0 !important; + border: none !important +} diff --git a/doc/content/en/docs/1.12.0/trevni/css/site.css b/doc/content/en/docs/1.12.0/trevni/css/site.css new file mode 100644 index 00000000000..055e7e286ad --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/css/site.css @@ -0,0 +1 @@ +/* You can override this file with your own styles */ \ No newline at end of file diff --git a/doc/content/en/docs/1.12.0/trevni/dependencies.html b/doc/content/en/docs/1.12.0/trevni/dependencies.html new file mode 100644 index 00000000000..f5fb35c4e03 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependencies.html @@ -0,0 +1,503 @@ + + + + + + + + Trevni Java – Project Dependencies + + + + + + + + + +
+
+
+

Project Dependencies

+

compile

+

The following is a list of compile dependencies for this project. These dependencies are required to compile and run the application:

+ + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicenses
org.slf4jslf4j-api2.0.13jarMIT License
+

test

+

The following is a list of test dependencies for this project. These dependencies are only required to compile and run unit tests for the application:

+ + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicenses
org.junit.jupiterjunit-jupiter5.10.3jarEclipse Public License v2.0
org.junit.vintagejunit-vintage-engine5.10.3jarEclipse Public License v2.0
org.slf4jslf4j-simple2.0.13jarMIT License
+

Project Transitive Dependencies

+

The following is a list of transitive dependencies for this project. Transitive dependencies are the dependencies of the project dependencies.

+

test

+

The following is a list of test dependencies for this project. These dependencies are only required to compile and run unit tests for the application:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicenses
junitjunit4.13.2jarEclipse Public License 1.0
org.apiguardianapiguardian-api1.1.2jarThe Apache License, Version 2.0
org.hamcresthamcrest-core1.3jarNew BSD License
org.junit.jupiterjunit-jupiter-api5.10.3jarEclipse Public License v2.0
org.junit.jupiterjunit-jupiter-engine5.10.3jarEclipse Public License v2.0
org.junit.jupiterjunit-jupiter-params5.10.3jarEclipse Public License v2.0
org.junit.platformjunit-platform-commons1.10.3jarEclipse Public License v2.0
org.junit.platformjunit-platform-engine1.10.3jarEclipse Public License v2.0
org.opentest4jopentest4j1.3.0jarThe Apache License, Version 2.0
+

Project Dependency Graph

+

Dependency Tree

+
+

Licenses

+

The Apache License, Version 2.0: org.apiguardian:apiguardian-api, org.opentest4j:opentest4j

+

Eclipse Public License 1.0: JUnit

+

MIT License: SLF4J API Module, SLF4J Simple Provider

+

Apache-2.0: Trevni Java

+

Eclipse Public License v2.0: JUnit Jupiter (Aggregator), JUnit Jupiter API, JUnit Jupiter Engine, JUnit Jupiter Params, JUnit Platform Commons, JUnit Platform Engine API, JUnit Vintage Engine

+

New BSD License: Hamcrest Core

+

Dependency File Details

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
FilenameSizeEntriesClassesPackagesJava VersionDebug Information
junit-4.13.2.jar384.6 kB389350321.5Yes
apiguardian-api-1.1.2.jar6.8 kB9329Yes
hamcrest-core-1.3.jar45 kB524531.5Yes
junit-jupiter-5.10.3.jar6.4 kB5119No
junit-jupiter-api-5.10.3.jar211.4 kB19818389Yes
junit-jupiter-engine-5.10.3.jar244.7 kB14713099Yes
junit-jupiter-params-5.10.3.jar586 kB381347229Yes
junit-platform-commons-1.10.3.jar106.2 kB644479Yes
junit-platform-engine-1.10.3.jar204.8 kB153136109Yes
junit-vintage-engine-5.10.3.jar67.5 kB493569Yes
opentest4j-1.3.0.jar14.3 kB15929Yes
slf4j-api-2.0.13.jar68.6 kB705559Yes
slf4j-simple-2.0.13.jar15.7 kB22729Yes
TotalSizeEntriesClassesPackagesJava VersionDebug Information
132 MB15541345109912
compile: 1compile: 68.6 kBcompile: 70compile: 55compile: 59compile: 1
test: 12test: 1.9 MBtest: 1484test: 1290test: 1049test: 11
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/dependency-convergence.html b/doc/content/en/docs/1.12.0/trevni/dependency-convergence.html new file mode 100644 index 00000000000..3b5a826a7eb --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependency-convergence.html @@ -0,0 +1,339 @@ + + + + + + + + Trevni Java – Reactor Dependency Convergence + + + + + + + + + +
+
+
+

Reactor Dependency Convergence

+ + + +
+ Legend: +
[Error]At least one dependency has a differing version of the dependency or has SNAPSHOT dependencies.

+ + + + + + + + + + + + + + + + + + + + + +
+ Statistics: +
Number of modules:4
Number of dependencies (NOD):114
Number of unique artifacts (NOA):131
Number of version-conflicting artifacts (NOC):12
Number of SNAPSHOT artifacts (NOS):0
Convergence (NOD/NOA):[Error] 87 %
Ready for release (100% convergence and no SNAPSHOTS):[Error] Error
You do not have 100% convergence.
+

Dependencies used in modules

+

ch.qos.reload4j:reload4j

+ + + +
[Error] + + + + + + +
1.2.19 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.slf4j:slf4j-reload4j:jar:1.7.36:provided
             \- ch.qos.reload4j:reload4j:jar:1.2.19:provided

1.2.22 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  +- ch.qos.reload4j:reload4j:jar:1.2.22:provided
       |  \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
       |     \- ch.qos.reload4j:reload4j:jar:1.2.22:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- ch.qos.reload4j:reload4j:jar:1.2.22:provided

+

com.nimbusds:nimbus-jose-jwt

+ + + +
[Error] + + + + + + +
3.10 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
             \- org.apache.kerby:kerb-simplekdc:jar:1.0.1:provided
                \- org.apache.kerby:kerb-client:jar:1.0.1:provided
                   \- org.apache.kerby:token-provider:jar:1.0.1:provided
                      \- com.nimbusds:nimbus-jose-jwt:jar:3.10:provided

9.8.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
             \- com.nimbusds:nimbus-jose-jwt:jar:9.8.1:provided

+

commons-codec:commons-codec

+ + + +
[Error] + + + + + + + + + +
1.11 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.httpcomponents:httpclient:jar:4.5.13:provided
             \- commons-codec:commons-codec:jar:1.11:provided

1.15 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  +- commons-codec:commons-codec:jar:1.15:provided
       |  \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
       |     \- commons-codec:commons-codec:jar:1.15:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- commons-codec:commons-codec:jar:1.15:provided

1.17.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:trevni-core:jar:1.12.0:compile
       \- org.apache.commons:commons-compress:jar:1.26.2:compile
          \- commons-codec:commons-codec:jar:1.17.0:compile

+

commons-io:commons-io

+ + + +
[Error] + + + + + + + + + +
2.16.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:trevni-core:jar:1.12.0:compile
       \- org.apache.commons:commons-compress:jar:1.26.2:compile
          \- commons-io:commons-io:jar:2.16.1:compile

2.5 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
             \- org.apache.kerby:kerb-simplekdc:jar:1.0.1:provided
                \- org.apache.kerby:kerb-client:jar:1.0.1:provided
                   \- org.apache.kerby:kerb-common:jar:1.0.1:provided
                      \- commons-io:commons-io:jar:2.5:provided

2.8.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  \- commons-io:commons-io:jar:2.8.0:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- commons-io:commons-io:jar:2.8.0:provided

+

commons-logging:commons-logging

+ + + +
[Error] + + + + + + +
1.1.3 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- commons-logging:commons-logging:jar:1.1.3:provided

1.2 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          +- org.apache.httpcomponents:httpclient:jar:4.5.13:provided
          |  \- commons-logging:commons-logging:jar:1.2:provided
          +- commons-beanutils:commons-beanutils:jar:1.9.4:provided
          |  \- commons-logging:commons-logging:jar:1.2:provided
          \- org.apache.commons:commons-configuration2:jar:2.8.0:provided
             \- commons-logging:commons-logging:jar:1.2:provided

+

jakarta.activation:jakarta.activation-api

+ + + +
[Error] + + + + + + +
1.2.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- jakarta.activation:jakarta.activation-api:jar:1.2.1:provided

1.2.2 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
          \- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
             \- com.fasterxml.jackson.module:jackson-module-jaxb-annotations:jar:2.17.2:provided
                +- jakarta.xml.bind:jakarta.xml.bind-api:jar:2.3.3:provided
                |  \- jakarta.activation:jakarta.activation-api:jar:1.2.2:provided
                \- jakarta.activation:jakarta.activation-api:jar:1.2.2:provided

+

org.apache.avro:avro

+ + + +
[Error] + + + + + + +
1.12.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    +- org.apache.avro:avro-mapred:jar:1.12.0:compile
    |  +- org.apache.avro:avro-ipc:jar:1.12.0:compile
    |  |  \- org.apache.avro:avro:jar:1.12.0:compile
    |  \- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
    |     \- org.apache.avro:avro:jar:1.12.0:compile
    \- org.apache.avro:avro:jar:1.12.0:compile

1.7.7 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.avro:avro:jar:1.7.7:provided

+

org.apache.commons:commons-text

+ + + +
[Error] + + + + + + +
1.10.0 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.commons:commons-text:jar:1.10.0:provided

1.9 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.apache.commons:commons-configuration2:jar:2.8.0:provided
             \- org.apache.commons:commons-text:jar:1.9:provided

+

org.codehaus.woodstox:stax2-api

+ + + +
[Error] + + + + + + +
4.2 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- com.fasterxml.woodstox:woodstox-core:jar:5.4.0:provided
             \- org.codehaus.woodstox:stax2-api:jar:4.2:provided

4.2.1 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- org.codehaus.woodstox:stax2-api:jar:4.2.1:provided

+

org.eclipse.jetty:jetty-http

+ + + +
[Error] + + + + + + +
9.4.51.v20230217 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-yarn-client:jar:3.3.6:provided
          \- org.eclipse.jetty.websocket:websocket-client:jar:9.4.51.v20230217:provided
             \- org.eclipse.jetty:jetty-client:jar:9.4.51.v20230217:provided
                \- org.eclipse.jetty:jetty-http:jar:9.4.51.v20230217:provided

9.4.55.v20240627 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:avro-mapred:jar:1.12.0:compile
       \- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
          \- org.eclipse.jetty:jetty-server:jar:9.4.55.v20240627:compile
             \- org.eclipse.jetty:jetty-http:jar:9.4.55.v20240627:compile

+

org.eclipse.jetty:jetty-io

+ + + +
[Error] + + + + + + +
9.4.51.v20230217 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-yarn-client:jar:3.3.6:provided
          \- org.eclipse.jetty.websocket:websocket-client:jar:9.4.51.v20230217:provided
             +- org.eclipse.jetty:jetty-client:jar:9.4.51.v20230217:provided
             |  \- org.eclipse.jetty:jetty-io:jar:9.4.51.v20230217:provided
             +- org.eclipse.jetty:jetty-io:jar:9.4.51.v20230217:compile
             \- org.eclipse.jetty.websocket:websocket-common:jar:9.4.51.v20230217:provided
                \- org.eclipse.jetty:jetty-io:jar:9.4.51.v20230217:provided

9.4.55.v20240627 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:avro-mapred:jar:1.12.0:compile
       \- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
          \- org.eclipse.jetty:jetty-server:jar:9.4.55.v20240627:compile
             +- org.eclipse.jetty:jetty-http:jar:9.4.55.v20240627:compile
             |  \- org.eclipse.jetty:jetty-io:jar:9.4.55.v20240627:compile
             \- org.eclipse.jetty:jetty-io:jar:9.4.55.v20240627:compile

+

org.slf4j:slf4j-api

+ + + +
[Error] + + + + + + + + + + + + + + + +
1.7.22 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          \- io.dropwizard.metrics:metrics-core:jar:3.2.4:provided
             \- org.slf4j:slf4j-api:jar:1.7.22:provided

1.7.25 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       \- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
          +- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
          |  \- org.apache.kerby:kerb-simplekdc:jar:1.0.1:provided
          |     \- org.apache.kerby:kerb-client:jar:1.0.1:provided
          |        \- org.apache.kerby:kerby-config:jar:1.0.1:provided
          |           \- org.slf4j:slf4j-api:jar:1.7.25:provided
          \- org.apache.kerby:kerb-core:jar:1.0.1:provided
             \- org.apache.kerby:kerby-pkix:jar:1.0.1:provided
                \- org.slf4j:slf4j-api:jar:1.7.25:provided

1.7.30 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.avro:avro-mapred:jar:1.12.0:compile
       \- org.apache.avro:avro-ipc:jar:1.12.0:compile
          \- org.apache.velocity:velocity-engine-core:jar:2.3:compile
             \- org.slf4j:slf4j-api:jar:1.7.30:compile

1.7.36 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    \- org.apache.hadoop:hadoop-client:jar:3.3.6:provided
       +- org.apache.hadoop:hadoop-common:jar:3.3.6:provided
       |  +- org.slf4j:slf4j-api:jar:1.7.36:provided
       |  +- org.slf4j:slf4j-reload4j:jar:1.7.36:provided
       |  |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
       |  \- org.apache.hadoop:hadoop-auth:jar:3.3.6:provided
       |     \- org.slf4j:slf4j-api:jar:1.7.36:provided
       +- org.apache.hadoop:hadoop-mapreduce-client-core:jar:3.3.6:provided
       |  +- org.apache.hadoop:hadoop-yarn-common:jar:3.3.6:provided
       |  |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
       |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
       \- org.apache.hadoop:hadoop-mapreduce-client-jobclient:jar:3.3.6:provided
          +- org.apache.hadoop:hadoop-mapreduce-client-common:jar:3.3.6:provided
          |  \- org.slf4j:slf4j-api:jar:1.7.36:provided
          \- org.slf4j:slf4j-api:jar:1.7.36:provided

2.0.13 +
    +
  1. org.apache.avro:trevni-avro:jar:1.12.0
    +- org.apache.avro:trevni-core:jar:1.12.0:compile
    |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    +- org.apache.avro:trevni-core:jar:tests:1.12.0:test
    |  \- org.slf4j:slf4j-api:jar:2.0.13:test
    +- org.apache.avro:avro-mapred:jar:1.12.0:compile
    |  +- org.apache.avro:avro-ipc:jar:1.12.0:compile
    |  |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    |  +- org.apache.avro:avro-ipc-jetty:jar:1.12.0:compile
    |  |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    +- org.apache.avro:avro:jar:1.12.0:compile
    |  \- org.slf4j:slf4j-api:jar:2.0.13:compile
    +- org.slf4j:slf4j-api:jar:2.0.13:compile
    \- org.slf4j:slf4j-simple:jar:2.0.13:test
       \- org.slf4j:slf4j-api:jar:2.0.13:test

+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/dependency-info.html b/doc/content/en/docs/1.12.0/trevni/dependency-info.html new file mode 100644 index 00000000000..f43fbc7da7e --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependency-info.html @@ -0,0 +1,118 @@ + + + + + + + + Trevni Java – Dependency Information + + + + + + + + + +
+
+
+

Dependency Information

+

Apache Maven

+
+
<dependency>
+  <groupId>org.apache.avro</groupId>
+  <artifactId>trevni-java</artifactId>
+  <version>1.12.0</version>
+  <type>pom</type>
+</dependency>
+

Apache Ivy

+
+
<dependency org="org.apache.avro" name="trevni-java" rev="1.12.0">
+  <artifact name="trevni-java" type="pom" />
+</dependency>
+

Groovy Grape

+
+
@Grapes(
+@Grab(group='org.apache.avro', module='trevni-java', version='1.12.0')
+)
+

Gradle/Grails

+
+
implementation 'org.apache.avro:trevni-java:1.12.0'
+

Scala SBT

+
+
libraryDependencies += "org.apache.avro" % "trevni-java" % "1.12.0"
+

Leiningen

+
+
[org.apache.avro/trevni-java "1.12.0"]
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/dependency-management.html b/doc/content/en/docs/1.12.0/trevni/dependency-management.html new file mode 100644 index 00000000000..310a167e880 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/dependency-management.html @@ -0,0 +1,613 @@ + + + + + + + + Trevni Java – Project Dependency Management + + + + + + + + + +
+
+
+

Project Dependency Management

+

compile

+

The following is a list of compile dependencies in the DependencyManagement of this project. These dependencies can be included in the submodules to compile and run the submodule:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicense
com.fasterxml.jackson.corejackson-annotations2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.corejackson-core2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.corejackson-databind2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-avro2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-cbor2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-csv2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-ion2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-properties2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-protobuf2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-smile2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-toml2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-xml2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.dataformatjackson-dataformat-yaml2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-eclipse-collections2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-guava2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate42.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate52.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate5-jakarta2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hibernate62.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-hppc2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jakarta-jsonp2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jaxrs2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jdk82.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-joda2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-joda-money2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-json-org2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jsr3102.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-jsr3532.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.datatypejackson-datatype-pcollections2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-base2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-cbor-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-json-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-smile-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-xml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jakarta.rsjackson-jakarta-rs-yaml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-base2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-cbor-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-json-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-smile-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-xml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jaxrsjackson-jaxrs-yaml-provider2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-all2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-annotation-support2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-extension-javatime2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-objects2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-retrofit22.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.jrjackson-jr-stree2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-afterburner2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-android-record2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-blackbird2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-guice2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-guice72.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jakarta-xmlbind-annotations2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jaxb-annotations2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jsonSchema2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-jsonSchema-jakarta2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-kotlin2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-mrbean2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-no-ctor-deser2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-osgi2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-parameter-names2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-paranamer2.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_2.112.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_2.122.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_2.132.17.2jarThe Apache Software License, Version 2.0
com.fasterxml.jackson.modulejackson-module-scala_32.17.2jarThe Apache Software License, Version 2.0
com.github.lubenzstd-jni1.5.6-4jarBSD 2-Clause License
io.grpcgrpc-core1.65.1jarApache 2.0
io.grpcgrpc-netty1.65.1jarApache 2.0
io.grpcgrpc-stub1.65.1jarApache 2.0
javax.servletjavax.servlet-api4.0.1jarCDDL + GPLv2 with classpath exception
net.sf.jopt-simplejopt-simple5.0.4jarThe MIT License
org.apache.commonscommons-compress1.26.2jarApache-2.0
org.apache.commonscommons-lang33.15.0jarApache-2.0
org.apache.hadoophadoop-client3.3.6jarApache License, Version 2.0
org.apache.maven.plugin-toolsmaven-plugin-annotations3.10.2jarApache-2.0
org.apache.velocityvelocity-engine-core2.3jarApache License, Version 2.0
org.eclipse.jettyjetty-server9.4.55.v20240627jarApache Software License - Version 2.0, Eclipse Public License - Version 1.0
org.eclipse.jettyjetty-servlet9.4.55.v20240627jarApache Software License - Version 2.0, Eclipse Public License - Version 1.0
org.eclipse.jettyjetty-util9.4.55.v20240627jarApache Software License - Version 2.0, Eclipse Public License - Version 1.0
org.tukaanixz1.9jarPublic Domain
org.xerial.snappysnappy-java1.1.10.5jarApache-2.0
+

test

+

The following is a list of test dependencies in the DependencyManagement of this project. These dependencies can be included in the submodules to compile and run unit tests for the submodule:

+ + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersionTypeLicense
org.hamcresthamcrest-library2.2jarBSD License 3
org.mockitomockito-core5.12.0jarMIT
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/distribution-management.html b/doc/content/en/docs/1.12.0/trevni/distribution-management.html new file mode 100644 index 00000000000..6301bd5c270 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/distribution-management.html @@ -0,0 +1,94 @@ + + + + + + + + Trevni Java – Project Distribution Management + + + + + + + + + +
+
+
+

Overview

+

The following is the distribution management information used by this project.

+

Repository - apache.releases.https

https://repository.apache.org/service/local/staging/deploy/maven2
+

Snapshot Repository - apache.snapshots.https

https://repository.apache.org/content/repositories/snapshots
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/images/close.gif b/doc/content/en/docs/1.12.0/trevni/images/close.gif new file mode 100644 index 00000000000..1c26bbc5264 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/close.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/collapsed.gif b/doc/content/en/docs/1.12.0/trevni/images/collapsed.gif new file mode 100644 index 00000000000..6e710840640 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/collapsed.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/expanded.gif b/doc/content/en/docs/1.12.0/trevni/images/expanded.gif new file mode 100644 index 00000000000..0fef3d89e0d Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/expanded.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/external.png b/doc/content/en/docs/1.12.0/trevni/images/external.png new file mode 100644 index 00000000000..3f999fc88b3 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/external.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_error_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_error_sml.gif new file mode 100644 index 00000000000..61132ef2b01 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_error_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_info_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_info_sml.gif new file mode 100644 index 00000000000..c6cb9ad7ce4 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_info_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_success_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_success_sml.gif new file mode 100644 index 00000000000..52e85a430af Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_success_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/icon_warning_sml.gif b/doc/content/en/docs/1.12.0/trevni/images/icon_warning_sml.gif new file mode 100644 index 00000000000..873bbb52cb9 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/icon_warning_sml.gif differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-black.png b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-black.png new file mode 100644 index 00000000000..919fd0f66a7 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-black.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-white.png b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-white.png new file mode 100644 index 00000000000..7d44c9c2e57 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/logos/build-by-maven-white.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/logos/maven-feather.png b/doc/content/en/docs/1.12.0/trevni/images/logos/maven-feather.png new file mode 100644 index 00000000000..b5ada836e9e Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/logos/maven-feather.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/images/newwindow.png b/doc/content/en/docs/1.12.0/trevni/images/newwindow.png new file mode 100644 index 00000000000..6287f72bd08 Binary files /dev/null and b/doc/content/en/docs/1.12.0/trevni/images/newwindow.png differ diff --git a/doc/content/en/docs/1.12.0/trevni/index.html b/doc/content/en/docs/1.12.0/trevni/index.html new file mode 100644 index 00000000000..b28fd9cfdce --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/index.html @@ -0,0 +1,107 @@ + + + + + + + + Trevni Java – About + + + + + + + + + +
+
+
+

About Trevni Java

+

Trevni Java

+

Project Modules

+

This project has declared the following modules:

+ + + + + + + + + + + + +
NameDescription
Trevni Java CoreTrevni Java Core
Trevni Java AvroTrevni Java Avro
Trevni SpecificationTrevni Java
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/issue-management.html b/doc/content/en/docs/1.12.0/trevni/issue-management.html new file mode 100644 index 00000000000..fa89c231276 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/issue-management.html @@ -0,0 +1,96 @@ + + + + + + + + Trevni Java – Issue Management + + + + + + + + + +
+
+
+

Overview

+

This project uses JIRA.

+

Issue Management

+

Issues, bugs, and feature requests should be submitted to the following issue management system for this project.

+
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/licenses.html b/doc/content/en/docs/1.12.0/trevni/licenses.html new file mode 100644 index 00000000000..13153347619 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/licenses.html @@ -0,0 +1,298 @@ + + + + + + + + Trevni Java – Project Licenses + + + + + + + + + +
+
+
+

Overview

+

Typically the licenses listed for the project are that of the project itself, and not of dependencies.

+

Project Licenses

+

Apache-2.0

+
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/mailing-lists.html b/doc/content/en/docs/1.12.0/trevni/mailing-lists.html new file mode 100644 index 00000000000..3ad9d4a9e2b --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/mailing-lists.html @@ -0,0 +1,117 @@ + + + + + + + + Trevni Java – Project Mailing Lists + + + + + + + + + +
+
+
+

Project Mailing Lists

+

These are the mailing lists that have been established for this project. For each list, there is a subscribe, unsubscribe, and an archive link.

+ + + + + + + + + + + + + + + + + + + + + + + + +
NameSubscribeUnsubscribePostArchive
Avro Developer ListSubscribeUnsubscribePostmail-archives.apache.org
Avro Users ListSubscribeUnsubscribePostmail-archives.apache.org
Avro Commits ListSubscribeUnsubscribePostmail-archives.apache.org
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/modules.html b/doc/content/en/docs/1.12.0/trevni/modules.html new file mode 100644 index 00000000000..aa7181309ea --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/modules.html @@ -0,0 +1,105 @@ + + + + + + + + Trevni Java – Project Modules + + + + + + + + + +
+
+
+

Project Modules

+

This project has declared the following modules:

+ + + + + + + + + + + + +
NameDescription
Trevni Java CoreTrevni Java Core
Trevni Java AvroTrevni Java Avro
Trevni SpecificationTrevni Java
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/plugin-management.html b/doc/content/en/docs/1.12.0/trevni/plugin-management.html new file mode 100644 index 00000000000..79b358bc983 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/plugin-management.html @@ -0,0 +1,244 @@ + + + + + + + + Trevni Java – Project Plugin Management + + + + + + + + + +
+
+
+

Project Plugin Management

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersion
com.diffplug.spotlessspotless-maven-plugin2.43.0
org.apache.maven.pluginsmaven-antrun-plugin3.1.0
org.apache.maven.pluginsmaven-assembly-plugin3.6.0
org.apache.maven.pluginsmaven-checkstyle-plugin3.4.0
org.apache.maven.pluginsmaven-clean-plugin3.3.2
org.apache.maven.pluginsmaven-compiler-plugin3.13.0
org.apache.maven.pluginsmaven-dependency-plugin3.6.1
org.apache.maven.pluginsmaven-deploy-plugin3.1.1
org.apache.maven.pluginsmaven-ear-plugin3.3.0
org.apache.maven.pluginsmaven-enforcer-plugin3.5.0
org.apache.maven.pluginsmaven-failsafe-plugin3.2.2
org.apache.maven.pluginsmaven-gpg-plugin3.2.4
org.apache.maven.pluginsmaven-help-plugin3.4.0
org.apache.maven.pluginsmaven-install-plugin3.1.1
org.apache.maven.pluginsmaven-invoker-plugin3.6.0
org.apache.maven.pluginsmaven-jar-plugin3.3.0
org.apache.maven.pluginsmaven-javadoc-plugin3.8.0
org.apache.maven.pluginsmaven-plugin-plugin3.13.1
org.apache.maven.pluginsmaven-plugin-report-plugin3.10.2
org.apache.maven.pluginsmaven-project-info-reports-plugin3.4.5
org.apache.maven.pluginsmaven-release-plugin3.0.1
org.apache.maven.pluginsmaven-remote-resources-plugin3.2.0
org.apache.maven.pluginsmaven-resources-plugin3.3.1
org.apache.maven.pluginsmaven-scm-plugin2.0.1
org.apache.maven.pluginsmaven-scm-publish-plugin3.2.1
org.apache.maven.pluginsmaven-shade-plugin3.6.0
org.apache.maven.pluginsmaven-site-plugin3.12.1
org.apache.maven.pluginsmaven-source-plugin3.3.1
org.apache.maven.pluginsmaven-surefire-plugin3.3.1
org.apache.maven.pluginsmaven-surefire-report-plugin3.2.2
org.apache.maven.pluginsmaven-toolchains-plugin3.2.0
org.apache.maven.pluginsmaven-war-plugin3.4.0
org.apache.ratapache-rat-plugin0.16.1
org.codehaus.mojobuild-helper-maven-plugin3.6.0
org.codehaus.mojoexec-maven-plugin3.3.0
org.cyclonedxcyclonedx-maven-plugin2.8.0
org.javacc.pluginjavacc-maven-plugin3.0.3
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/plugins.html b/doc/content/en/docs/1.12.0/trevni/plugins.html new file mode 100644 index 00000000000..7dfb97d6313 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/plugins.html @@ -0,0 +1,158 @@ + + + + + + + + Trevni Java – Project Plugins + + + + + + + + + +
+
+
+

Project Build Plugins

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
GroupIdArtifactIdVersion
com.diffplug.spotlessspotless-maven-plugin2.43.0
org.apache.felixmaven-bundle-plugin5.1.9
org.apache.maven.pluginsmaven-checkstyle-plugin3.4.0
org.apache.maven.pluginsmaven-clean-plugin3.3.2
org.apache.maven.pluginsmaven-deploy-plugin3.1.1
org.apache.maven.pluginsmaven-enforcer-plugin3.5.0
org.apache.maven.pluginsmaven-install-plugin3.1.1
org.apache.maven.pluginsmaven-plugin-plugin3.13.1
org.apache.maven.pluginsmaven-remote-resources-plugin3.2.0
org.apache.maven.pluginsmaven-site-plugin3.12.1
org.apache.maven.pluginsmaven-toolchains-plugin3.2.0
org.apache.ratapache-rat-plugin0.16.1
org.cyclonedxcyclonedx-maven-plugin2.8.0
+

Project Report Plugins

+ + + + + + + + +
GroupIdArtifactIdVersion
org.apache.maven.pluginsmaven-project-info-reports-plugin3.4.5
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/project-info.html b/doc/content/en/docs/1.12.0/trevni/project-info.html new file mode 100644 index 00000000000..df0c1e3ebf8 --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/project-info.html @@ -0,0 +1,139 @@ + + + + + + + + Trevni Java – Project Information + + + + + + + + + +
+
+
+

Project Information

+

This document provides an overview of the various documents and links that are part of this project's general information. All of this content is automatically generated by Maven on behalf of the project.

+

Overview

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
DocumentDescription
DependenciesThis document lists the project's dependencies and provides information on each dependency.
Dependency ConvergenceThis document presents the convergence of dependency versions across the entire project, and its sub modules.
Dependency InformationThis document describes how to include this project as a dependency using various dependency management tools.
Dependency ManagementThis document lists the dependencies that are defined through dependencyManagement.
Distribution ManagementThis document provides informations on the distribution management of this project.
AboutTrevni Java
Issue ManagementThis document provides information on the issue management system used in this project.
LicensesThis document lists the project license(s).
Mailing ListsThis document provides subscription and archive information for this project's mailing lists.
Project ModulesThis document lists the modules (sub-projects) of this project.
Plugin ManagementThis document lists the plugins that are defined through pluginManagement.
PluginsThis document lists the build plugins and the report plugins used by this project.
Source Code ManagementThis document lists ways to access the online source repository.
SummaryThis document lists other related information of this project
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/scm.html b/doc/content/en/docs/1.12.0/trevni/scm.html new file mode 100644 index 00000000000..c9117f1f2dc --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/scm.html @@ -0,0 +1,106 @@ + + + + + + + + Trevni Java – Source Code Management + + + + + + + + + +
+
+
+

Overview

+

This project uses Git to manage its source code. Instructions on Git use can be found at https://git-scm.com/documentation.

+

Web Browser Access

+

The following is a link to a browsable version of the source repository:

+
+

Anonymous Access

+

The source can be checked out anonymously from Git with this command (See https://git-scm.com/docs/git-clone):

+
+
$ git clone https://github.com/apache/avro/lang/avro-parent/trevni-java
+

Developer Access

+

Only project developers can access the Git tree via this method (See https://git-scm.com/docs/git-clone).

+
+
$ git clone https://github.com/apache/avro/lang/avro-parent/trevni-java
+

Access from Behind a Firewall

+

Refer to the documentation of the SCM used for more information about access behind a firewall.

+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/1.12.0/trevni/summary.html b/doc/content/en/docs/1.12.0/trevni/summary.html new file mode 100644 index 00000000000..ee8ac2578cd --- /dev/null +++ b/doc/content/en/docs/1.12.0/trevni/summary.html @@ -0,0 +1,133 @@ + + + + + + + + Trevni Java – Project Summary + + + + + + + + + +
+
+
+

Project Summary

+

Project Information

+ + + + + + + + + + + + +
FieldValue
NameTrevni Java
DescriptionTrevni Java
Homepagehttps://avro.apache.org/
+

Project Organization

+ + + + + + + + + +
FieldValue
NameThe Apache Software Foundation
URLhttps://www.apache.org/
+

Build Information

+ + + + + + + + + + + + + + + +
FieldValue
GroupIdorg.apache.avro
ArtifactIdtrevni-java
Version1.12.0
Typepom
+
+
+
+
+
+ + + diff --git a/doc/content/en/docs/_index.md b/doc/content/en/docs/_index.md index 541db8d6b76..ac7cba47bb1 100755 --- a/doc/content/en/docs/_index.md +++ b/doc/content/en/docs/_index.md @@ -54,5 +54,3 @@ Avro provides functionality similar to systems such as [Thrift](https://thrift.a * Dynamic typing: Avro does not require that code be generated. Data is always accompanied by a schema that permits full processing of that data without code generation, static datatypes, etc. This facilitates construction of generic data-processing systems and languages. * Untagged data: Since the schema is present when data is read, considerably less type information need be encoded with data, resulting in smaller serialization size. * No manually-assigned field IDs: When a schema changes, both the old and new schema are always present when processing data, so differences may be resolved symbolically, using field names. - - diff --git a/doc/docker-compose.yaml b/doc/docker-compose.yaml deleted file mode 100644 index 833d8839a0b..00000000000 --- a/doc/docker-compose.yaml +++ /dev/null @@ -1,32 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# - -version: "3.3" - -services: - - site: - image: docsy/docsy-example - build: - context: . - command: server - ports: - - "1313:1313" - volumes: - - .:/src diff --git a/doc/go.mod b/doc/go.mod new file mode 100644 index 00000000000..6a69a014b4c --- /dev/null +++ b/doc/go.mod @@ -0,0 +1,26 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +module github.com/apache/avro + +go 1.22.6 + +require ( + github.com/FortAwesome/Font-Awesome v0.0.0-20240402185447-c0f460dca7f7 // indirect + github.com/google/docsy v0.10.0 // indirect + github.com/twbs/bootstrap v5.3.3+incompatible // indirect +) diff --git a/doc/go.sum b/doc/go.sum new file mode 100644 index 00000000000..69719376425 --- /dev/null +++ b/doc/go.sum @@ -0,0 +1,6 @@ +github.com/FortAwesome/Font-Awesome v0.0.0-20240402185447-c0f460dca7f7 h1:2aWEKCRLqQ9nPyXaz4/IYtRrDr3PzEiX0DUSUr2/EDs= +github.com/FortAwesome/Font-Awesome v0.0.0-20240402185447-c0f460dca7f7/go.mod h1:IUgezN/MFpCDIlFezw3L8j83oeiIuYoj28Miwr/KUYo= +github.com/google/docsy v0.10.0 h1:6tMDacPwAyRWNCfvsn/9qGOZDQ8b0aRzjRZvnZPY5dg= +github.com/google/docsy v0.10.0/go.mod h1:c0nIAqmRTOuJ01F85U/wJPQtc3Zj9N58Kea9bOT2AJc= +github.com/twbs/bootstrap v5.3.3+incompatible h1:goFoqinzdHfkeegpFP7pvhbd0g+A3O2hbU3XCjuNrEQ= +github.com/twbs/bootstrap v5.3.3+incompatible/go.mod h1:fZTSrkpSf0/HkL0IIJzvVspTt1r9zuf7XlZau8kpcY0= diff --git a/doc/layouts/partials/navbar-asf-links.html b/doc/layouts/partials/navbar-asf-links.html index 54e3b8dcf08..e85b4b9275e 100644 --- a/doc/layouts/partials/navbar-asf-links.html +++ b/doc/layouts/partials/navbar-asf-links.html @@ -19,11 +19,13 @@ --> - -