From 7577d1e024d4ecff40a6877222317ca555c7dae5 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 15:15:30 +0000
Subject: [PATCH 01/86] Add jvector-4.0.0-rc.5 dependency

---
 lucene/licenses/agrona-1.20.0.jar.sha1        |   1 +
 lucene/licenses/agrona-LICENSE-ASL.txt        | 201 ++++++++
 lucene/licenses/agrona-NOTICE.txt             |   6 +
 lucene/licenses/commons-math3-3.6.1.jar.sha1  |   1 +
 lucene/licenses/commons-math3-LICENSE-ASL.txt | 456 ++++++++++++++++++
 lucene/licenses/commons-math3-NOTICE.txt      |   4 +
 lucene/licenses/jvector-4.0.0-rc.5.jar.sha1   |   1 +
 lucene/licenses/jvector-LICENSE-ASL.txt       | 202 ++++++++
 lucene/licenses/jvector-NOTICE.txt            |   6 +
 lucene/licenses/snakeyaml-2.4.jar.sha1        |   1 +
 lucene/licenses/snakeyaml-LICENSE-ASL.txt     | 176 +++++++
 lucene/licenses/snakeyaml-NOTICE.txt          |   4 +
 lucene/sandbox/build.gradle                   |  13 +
 versions.lock                                 | 274 ++++++++++-
 14 files changed, 1342 insertions(+), 4 deletions(-)
 create mode 100644 lucene/licenses/agrona-1.20.0.jar.sha1
 create mode 100644 lucene/licenses/agrona-LICENSE-ASL.txt
 create mode 100644 lucene/licenses/agrona-NOTICE.txt
 create mode 100644 lucene/licenses/commons-math3-3.6.1.jar.sha1
 create mode 100644 lucene/licenses/commons-math3-LICENSE-ASL.txt
 create mode 100644 lucene/licenses/commons-math3-NOTICE.txt
 create mode 100644 lucene/licenses/jvector-4.0.0-rc.5.jar.sha1
 create mode 100644 lucene/licenses/jvector-LICENSE-ASL.txt
 create mode 100644 lucene/licenses/jvector-NOTICE.txt
 create mode 100644 lucene/licenses/snakeyaml-2.4.jar.sha1
 create mode 100644 lucene/licenses/snakeyaml-LICENSE-ASL.txt
 create mode 100644 lucene/licenses/snakeyaml-NOTICE.txt

diff --git a/lucene/licenses/agrona-1.20.0.jar.sha1 b/lucene/licenses/agrona-1.20.0.jar.sha1
new file mode 100644
index 000000000000..badef8d6e169
--- /dev/null
+++ b/lucene/licenses/agrona-1.20.0.jar.sha1
@@ -0,0 +1 @@
+00580b67864f7739bf7778162f418ada69fa3037
diff --git a/lucene/licenses/agrona-LICENSE-ASL.txt b/lucene/licenses/agrona-LICENSE-ASL.txt
new file mode 100644
index 000000000000..91d486281cdf
--- /dev/null
+++ b/lucene/licenses/agrona-LICENSE-ASL.txt
@@ -0,0 +1,201 @@
+Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "{}"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright {yyyy} {name of copyright owner}
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       https://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/lucene/licenses/agrona-NOTICE.txt b/lucene/licenses/agrona-NOTICE.txt
new file mode 100644
index 000000000000..795926439ada
--- /dev/null
+++ b/lucene/licenses/agrona-NOTICE.txt
@@ -0,0 +1,6 @@
+This product includes software developed by the Agrona project.
+https://github.com/real-logic/agrona
+
+Copyright © 2014-2023 Real Logic Limited
+
+Licensed under the Apache License, Version 2.0.
diff --git a/lucene/licenses/commons-math3-3.6.1.jar.sha1 b/lucene/licenses/commons-math3-3.6.1.jar.sha1
new file mode 100644
index 000000000000..ed9a549757f5
--- /dev/null
+++ b/lucene/licenses/commons-math3-3.6.1.jar.sha1
@@ -0,0 +1 @@
+e4ba98f1d4b3c80ec46392f25e094a6a2e58fcbf
diff --git a/lucene/licenses/commons-math3-LICENSE-ASL.txt b/lucene/licenses/commons-math3-LICENSE-ASL.txt
new file mode 100644
index 000000000000..a08b1c749765
--- /dev/null
+++ b/lucene/licenses/commons-math3-LICENSE-ASL.txt
@@ -0,0 +1,456 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+Apache Commons Math includes the following code provided to the ASF under the
+Apache License 2.0:
+
+ - The inverse error function implementation in the Erf class is based on CUDA
+   code developed by Mike Giles, Oxford-Man Institute of Quantitative Finance,
+   and published in GPU Computing Gems, volume 2, 2010 (grant received on
+   March 23th 2013)
+ - The LinearConstraint, LinearObjectiveFunction, LinearOptimizer,
+   RelationShip, SimplexSolver and SimplexTableau classes in package
+   org.apache.commons.math3.optimization.linear include software developed by
+   Benjamin McCann (http://www.benmccann.com) and distributed with
+   the following copyright: Copyright 2009 Google Inc. (grant received on
+   March 16th 2009)
+ - The class "org.apache.commons.math3.exception.util.LocalizedFormatsTest" which
+   is an adapted version of "OrekitMessagesTest" test class for the Orekit library
+ - The "org.apache.commons.math3.analysis.interpolation.HermiteInterpolator"
+   has been imported from the Orekit space flight dynamics library.
+
+===============================================================================
+
+
+
+APACHE COMMONS MATH DERIVATIVE WORKS:
+
+The Apache commons-math library includes a number of subcomponents
+whose implementation is derived from original sources written
+in C or Fortran.  License terms of the original sources
+are reproduced below.
+
+===============================================================================
+For the lmder, lmpar and qrsolv Fortran routine from minpack and translated in
+the LevenbergMarquardtOptimizer class in package
+org.apache.commons.math3.optimization.general
+Original source copyright and license statement:
+
+Minpack Copyright Notice (1999) University of Chicago.  All rights reserved
+
+Redistribution and use in source and binary forms, with or
+without modification, are permitted provided that the
+following conditions are met:
+
+1. Redistributions of source code must retain the above
+copyright notice, this list of conditions and the following
+disclaimer.
+
+2. Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following
+disclaimer in the documentation and/or other materials
+provided with the distribution.
+
+3. The end-user documentation included with the
+redistribution, if any, must include the following
+acknowledgment:
+
+   "This product includes software developed by the
+   University of Chicago, as Operator of Argonne National
+   Laboratory.
+
+Alternately, this acknowledgment may appear in the software
+itself, if and wherever such third-party acknowledgments
+normally appear.
+
+4. WARRANTY DISCLAIMER. THE SOFTWARE IS SUPPLIED "AS IS"
+WITHOUT WARRANTY OF ANY KIND. THE COPYRIGHT HOLDER, THE
+UNITED STATES, THE UNITED STATES DEPARTMENT OF ENERGY, AND
+THEIR EMPLOYEES: (1) DISCLAIM ANY WARRANTIES, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO ANY IMPLIED WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, TITLE
+OR NON-INFRINGEMENT, (2) DO NOT ASSUME ANY LEGAL LIABILITY
+OR RESPONSIBILITY FOR THE ACCURACY, COMPLETENESS, OR
+USEFULNESS OF THE SOFTWARE, (3) DO NOT REPRESENT THAT USE OF
+THE SOFTWARE WOULD NOT INFRINGE PRIVATELY OWNED RIGHTS, (4)
+DO NOT WARRANT THAT THE SOFTWARE WILL FUNCTION
+UNINTERRUPTED, THAT IT IS ERROR-FREE OR THAT ANY ERRORS WILL
+BE CORRECTED.
+
+5. LIMITATION OF LIABILITY. IN NO EVENT WILL THE COPYRIGHT
+HOLDER, THE UNITED STATES, THE UNITED STATES DEPARTMENT OF
+ENERGY, OR THEIR EMPLOYEES: BE LIABLE FOR ANY INDIRECT,
+INCIDENTAL, CONSEQUENTIAL, SPECIAL OR PUNITIVE DAMAGES OF
+ANY KIND OR NATURE, INCLUDING BUT NOT LIMITED TO LOSS OF
+PROFITS OR LOSS OF DATA, FOR ANY REASON WHATSOEVER, WHETHER
+SUCH LIABILITY IS ASSERTED ON THE BASIS OF CONTRACT, TORT
+(INCLUDING NEGLIGENCE OR STRICT LIABILITY), OR OTHERWISE,
+EVEN IF ANY OF SAID PARTIES HAS BEEN WARNED OF THE
+POSSIBILITY OF SUCH LOSS OR DAMAGES.
+===============================================================================
+
+Copyright and license statement for the odex Fortran routine developed by
+E. Hairer and G. Wanner and translated in GraggBulirschStoerIntegrator class
+in package org.apache.commons.math3.ode.nonstiff:
+
+
+Copyright (c) 2004, Ernst Hairer
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+- Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+
+- Redistributions in binary form must reproduce the above copyright
+notice, this list of conditions and the following disclaimer in the
+documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR
+CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+===============================================================================
+
+Copyright and license statement for the original Mersenne twister C
+routines translated in MersenneTwister class in package
+org.apache.commons.math3.random:
+
+   Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura,
+   All rights reserved.
+
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+     1. Redistributions of source code must retain the above copyright
+        notice, this list of conditions and the following disclaimer.
+
+     2. Redistributions in binary form must reproduce the above copyright
+        notice, this list of conditions and the following disclaimer in the
+        documentation and/or other materials provided with the distribution.
+
+     3. The names of its contributors may not be used to endorse or promote
+        products derived from this software without specific prior written
+        permission.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+===============================================================================
+
+The initial code for shuffling an array (originally in class
+"org.apache.commons.math3.random.RandomDataGenerator", now replaced by
+a method in class "org.apache.commons.math3.util.MathArrays") was
+inspired from the algorithm description provided in
+"Algorithms", by Ian Craw and John Pulham (University of Aberdeen 1999).
+The textbook (containing a proof that the shuffle is uniformly random) is
+available here:
+  http://citeseerx.ist.psu.edu/viewdoc/download;?doi=10.1.1.173.1898&rep=rep1&type=pdf
+
+===============================================================================
+License statement for the direction numbers in the resource files for Sobol sequences.
+
+-----------------------------------------------------------------------------
+Licence pertaining to sobol.cc and the accompanying sets of direction numbers
+
+-----------------------------------------------------------------------------
+Copyright (c) 2008, Frances Y. Kuo and Stephen Joe
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the names of the copyright holders nor the names of the
+      University of New South Wales and the University of Waikato
+      and its contributors may be used to endorse or promote products derived
+      from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+===============================================================================
+
+The initial commit of package "org.apache.commons.math3.ml.neuralnet" is
+an adapted version of code developed in the context of the Data Processing
+and Analysis Consortium (DPAC) of the "Gaia" project of the European Space
+Agency (ESA).
+===============================================================================
+
+The initial commit of the class "org.apache.commons.math3.special.BesselJ" is
+an adapted version of code translated from the netlib Fortran program, rjbesl
+http://www.netlib.org/specfun/rjbesl by R.J. Cody at Argonne National
+Laboratory (USA).  There is no license or copyright statement included with the
+original Fortran sources.
+===============================================================================
+
+
+The BracketFinder (package org.apache.commons.math3.optimization.univariate)
+and PowellOptimizer (package org.apache.commons.math3.optimization.general)
+classes are based on the Python code in module "optimize.py" (version 0.5)
+developed by Travis E. Oliphant for the SciPy library (http://www.scipy.org/)
+Copyright © 2003-2009 SciPy Developers.
+
+SciPy license
+Copyright © 2001, 2002 Enthought, Inc.
+All rights reserved.
+
+Copyright © 2003-2013 SciPy Developers.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+    * Redistributions of source code must retain the above copyright
+      notice, this list of conditions and the following disclaimer.
+
+    * Redistributions in binary form must reproduce the above copyright
+      notice, this list of conditions and the following disclaimer in the
+      documentation and/or other materials provided with the distribution.
+
+    * Neither the name of Enthought nor the names of the SciPy Developers may
+      be used to endorse or promote products derived from this software without
+      specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS “AS IS” AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+===============================================================================
diff --git a/lucene/licenses/commons-math3-NOTICE.txt b/lucene/licenses/commons-math3-NOTICE.txt
new file mode 100644
index 000000000000..5e2a2f91d48a
--- /dev/null
+++ b/lucene/licenses/commons-math3-NOTICE.txt
@@ -0,0 +1,4 @@
+This product includes software developed by the Apache Commons Math project.
+https://commons.apache.org/proper/commons-math/
+
+Licensed under the Apache License, Version 2.0.
diff --git a/lucene/licenses/jvector-4.0.0-rc.5.jar.sha1 b/lucene/licenses/jvector-4.0.0-rc.5.jar.sha1
new file mode 100644
index 000000000000..ae9459b0c93d
--- /dev/null
+++ b/lucene/licenses/jvector-4.0.0-rc.5.jar.sha1
@@ -0,0 +1 @@
+799740d5484d589c579ba0b9a65ec887ec542123
diff --git a/lucene/licenses/jvector-LICENSE-ASL.txt b/lucene/licenses/jvector-LICENSE-ASL.txt
new file mode 100644
index 000000000000..d64569567334
--- /dev/null
+++ b/lucene/licenses/jvector-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/lucene/licenses/jvector-NOTICE.txt b/lucene/licenses/jvector-NOTICE.txt
new file mode 100644
index 000000000000..0542e27d7ef7
--- /dev/null
+++ b/lucene/licenses/jvector-NOTICE.txt
@@ -0,0 +1,6 @@
+This product includes software developed by the JVector project.
+https://github.com/jbellis/jvector
+
+Copyright © 2023 Jonathan Ellis
+
+Licensed under the Apache License, Version 2.0.
diff --git a/lucene/licenses/snakeyaml-2.4.jar.sha1 b/lucene/licenses/snakeyaml-2.4.jar.sha1
new file mode 100644
index 000000000000..8739f8c17629
--- /dev/null
+++ b/lucene/licenses/snakeyaml-2.4.jar.sha1
@@ -0,0 +1 @@
+e0666b825b796f85521f02360e77f4c92c5a7a07
diff --git a/lucene/licenses/snakeyaml-LICENSE-ASL.txt b/lucene/licenses/snakeyaml-LICENSE-ASL.txt
new file mode 100644
index 000000000000..d9a10c0d8e86
--- /dev/null
+++ b/lucene/licenses/snakeyaml-LICENSE-ASL.txt
@@ -0,0 +1,176 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
diff --git a/lucene/licenses/snakeyaml-NOTICE.txt b/lucene/licenses/snakeyaml-NOTICE.txt
new file mode 100644
index 000000000000..c1e6931cc149
--- /dev/null
+++ b/lucene/licenses/snakeyaml-NOTICE.txt
@@ -0,0 +1,4 @@
+This product includes software developed by the SnakeYAML project.
+https://bitbucket.org/snakeyaml/snakeyaml
+
+Licensed under the Apache License, Version 2.0.
diff --git a/lucene/sandbox/build.gradle b/lucene/sandbox/build.gradle
index daf952f84a8d..6040c651f887 100644
--- a/lucene/sandbox/build.gradle
+++ b/lucene/sandbox/build.gradle
@@ -16,12 +16,25 @@
  */
 
 
+plugins {
+  id 'java-library'
+}
 
 description = 'Various third party contributions and new ideas'
 
+java {
+  modularity.inferModulePath = true
+}
+
 dependencies {
   moduleApi project(':lucene:core')
   moduleApi project(':lucene:queries')
   moduleApi project(':lucene:facet')
   moduleTestImplementation project(':lucene:test-framework')
+
+  moduleImplementation('io.github.jbellis:jvector:4.0.0-rc.5') {
+    exclude group: 'org.slf4j', module: 'slf4j-api'
+  }
+
+  moduleImplementation 'org.slf4j:slf4j-api:2.0.17'
 }
diff --git a/versions.lock b/versions.lock
index ba7fa170cddf..8dfeb4db3f8e 100644
--- a/versions.lock
+++ b/versions.lock
@@ -6,14 +6,16 @@
       "com.ibm.icu:icu4j:78.1" : "47ea4550,refs=6",
       "commons-codec:commons-codec:1.20.0" : "e6288df0,refs=6",
       "commons-io:commons-io:2.20.0" : "5ce8cdc6,refs=2",
+      "io.github.jbellis:jvector:4.0.0-rc.5" : "9f877bb0,refs=7",
       "io.sgr:s2-geometry-library-java:1.0.0" : "cbc357ab,refs=4",
       "junit:junit:4.13.2" : "fa9ef26b,refs=4",
       "net.sf.jopt-simple:jopt-simple:5.0.4" : "85a1e4c6,refs=2",
       "net.sourceforge.nekohtml:nekohtml:1.9.22" : "5ce8cdc6,refs=2",
+      "org.agrona:agrona:1.20.0" : "9f877bb0,refs=7",
       "org.antlr:antlr4-runtime:4.13.2" : "d9953130,refs=4",
       "org.apache.commons:commons-compress:1.28.0" : "5ce8cdc6,refs=2",
       "org.apache.commons:commons-lang3:3.18.0" : "5ce8cdc6,refs=2",
-      "org.apache.commons:commons-math3:3.6.1" : "85a1e4c6,refs=2",
+      "org.apache.commons:commons-math3:3.6.1" : "dd26014b,refs=8",
       "org.apache.opennlp:opennlp-tools:2.5.6.1" : "2f760bab,refs=4",
       "org.carrot2:morfologik-fsa:2.1.9" : "79af844b,refs=4",
       "org.carrot2:morfologik-polish:2.1.9" : "fe494320,refs=3",
@@ -21,7 +23,8 @@
       "org.hamcrest:hamcrest:3.0" : "fa9ef26b,refs=4",
       "org.locationtech.spatial4j:spatial4j:0.8" : "cbc357ab,refs=4",
       "org.openjdk.jmh:jmh-core:1.37" : "85a1e4c6,refs=2",
-      "org.slf4j:slf4j-api:2.0.17" : "2f760bab,refs=4",
+      "org.slf4j:slf4j-api:2.0.17" : "07f0efc6,refs=10",
+      "org.yaml:snakeyaml:2.4" : "9f877bb0,refs=7",
       "ua.net.nlp:morfologik-ukrainian-search:4.9.1" : "fe494320,refs=3",
       "xerces:xercesImpl:2.12.2" : "5ce8cdc6,refs=2"
     },
@@ -48,16 +51,18 @@
       "commons-io:commons-io:2.20.0" : "6f16ff86,refs=2",
       "io.github.eisop:dataflow-errorprone:3.41.0-eisop1" : "90685606,refs=39",
       "io.github.java-diff-utils:java-diff-utils:4.12" : "90685606,refs=39",
+      "io.github.jbellis:jvector:4.0.0-rc.5" : "43dd284b,refs=10",
       "io.sgr:s2-geometry-library-java:1.0.0" : "1d5a4b2b,refs=4",
       "javax.inject:javax.inject:1" : "90685606,refs=39",
       "junit:junit:4.13.2" : "129da9bf,refs=76",
       "net.bytebuddy:byte-buddy:1.17.7" : "b7ba1646,refs=2",
       "net.sf.jopt-simple:jopt-simple:5.0.4" : "152d9f78,refs=3",
       "net.sourceforge.nekohtml:nekohtml:1.9.22" : "6f16ff86,refs=2",
+      "org.agrona:agrona:1.20.0" : "43dd284b,refs=10",
       "org.antlr:antlr4-runtime:4.13.2" : "6fbc4021,refs=5",
       "org.apache.commons:commons-compress:1.28.0" : "6f16ff86,refs=2",
       "org.apache.commons:commons-lang3:3.18.0" : "6f16ff86,refs=2",
-      "org.apache.commons:commons-math3:3.6.1" : "152d9f78,refs=3",
+      "org.apache.commons:commons-math3:3.6.1" : "f0656784,refs=12",
       "org.apache.opennlp:opennlp-tools:2.5.6.1" : "b91715f0,refs=6",
       "org.assertj:assertj-core:3.27.6" : "b7ba1646,refs=2",
       "org.carrot2:morfologik-fsa:2.1.9" : "e077a675,refs=8",
@@ -71,12 +76,55 @@
       "org.openjdk.jmh:jmh-core:1.37" : "152d9f78,refs=3",
       "org.openjdk.jmh:jmh-generator-annprocess:1.37" : "ecaf1d73,refs=1",
       "org.pcollections:pcollections:4.0.1" : "90685606,refs=39",
-      "org.slf4j:slf4j-api:2.0.17" : "b91715f0,refs=6",
+      "org.slf4j:slf4j-api:2.0.17" : "736bb8da,refs=15",
+      "org.yaml:snakeyaml:2.4" : "43dd284b,refs=10",
       "ua.net.nlp:morfologik-ukrainian-search:4.9.1" : "cb00cecf,refs=5",
       "xerces:xercesImpl:2.12.2" : "6f16ff86,refs=2"
     }
   },
   "because" : {
+    "07f0efc6" : [
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:benchmark"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:demo"
+      },
+      {
+        "configuration" : "compileClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:queryparser"
+      },
+      {
+        "configuration" : "compileClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "compileClasspath",
+        "projectPath" : ":lucene:analysis:opennlp"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:analysis:opennlp"
+      }
+    ],
     "129da9bf" : [
       {
         "configuration" : "testCompileClasspath",
@@ -443,6 +491,48 @@
         "projectPath" : ":lucene:analysis:opennlp"
       }
     ],
+    "43dd284b" : [
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:benchmark"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:demo"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:highlighter"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:memory"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:monitor"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:queryparser"
+      },
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:sandbox"
+      }
+    ],
     "47ea4550" : [
       {
         "configuration" : "compileClasspath",
@@ -511,6 +601,68 @@
         "projectPath" : ":lucene:queries"
       }
     ],
+    "736bb8da" : [
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:analysis.tests"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:analysis.tests"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:benchmark"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:demo"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:highlighter"
+      },
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:memory"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:monitor"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:queryparser"
+      },
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:analysis:opennlp"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:analysis:opennlp"
+      }
+    ],
     "79af844b" : [
       {
         "configuration" : "compileClasspath",
@@ -731,6 +883,36 @@
         "projectPath" : ":lucene:analysis:phonetic"
       }
     ],
+    "9f877bb0" : [
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:benchmark"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:demo"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:queryparser"
+      },
+      {
+        "configuration" : "compileClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:sandbox"
+      }
+    ],
     "b7ba1646" : [
       {
         "configuration" : "testCompileClasspath",
@@ -825,6 +1007,40 @@
         "projectPath" : ":lucene:expressions"
       }
     ],
+    "dd26014b" : [
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:benchmark"
+      },
+      {
+        "configuration" : "compileClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:demo"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:queryparser"
+      },
+      {
+        "configuration" : "compileClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "runtimeClasspath",
+        "projectPath" : ":lucene:sandbox"
+      }
+    ],
     "e077a675" : [
       {
         "configuration" : "testCompileClasspath",
@@ -891,6 +1107,56 @@
         "projectPath" : ":lucene:benchmark-jmh"
       }
     ],
+    "f0656784" : [
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:benchmark"
+      },
+      {
+        "configuration" : "annotationProcessor",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:benchmark-jmh"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:demo"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:highlighter"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:luke"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:memory"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:monitor"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:queryparser"
+      },
+      {
+        "configuration" : "testCompileClasspath",
+        "projectPath" : ":lucene:sandbox"
+      },
+      {
+        "configuration" : "testRuntimeClasspath",
+        "projectPath" : ":lucene:sandbox"
+      }
+    ],
     "fa9ef26b" : [
       {
         "configuration" : "compileClasspath",

From 7843b82f6cdc1f3075a2ff5f489039f20c2c7c0a Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 15:19:11 +0000
Subject: [PATCH 02/86] [build-fails] Checkout opensearch jvector codec

---
 lucene/sandbox/src/java/module-info.java      |    6 +-
 .../jvector/ForceMergesOnlyMergePolicy.java   |   99 ++
 .../codecs/jvector/GraphNodeIdToDocMap.java   |  150 ++
 .../jvector/JVectorFloatVectorValues.java     |  121 ++
 .../sandbox/codecs/jvector/JVectorFormat.java |  196 +++
 .../codecs/jvector/JVectorIndexWriter.java    |  105 ++
 .../codecs/jvector/JVectorKnnCollector.java   |   67 +
 .../jvector/JVectorKnnFloatVectorQuery.java   |   83 +
 .../jvector/JVectorRandomAccessReader.java    |  174 ++
 .../sandbox/codecs/jvector/JVectorReader.java |  382 ++++
 .../codecs/jvector/JVectorVectorScorer.java   |   38 +
 .../sandbox/codecs/jvector/JVectorWriter.java | 1097 ++++++++++++
 .../sandbox/codecs/jvector/package-info.java  |   23 +
 .../org.apache.lucene.codecs.KnnVectorsFormat |    1 +
 .../codecs/jvector/KNNJVectorTests.java       | 1557 +++++++++++++++++
 15 files changed, 4098 insertions(+), 1 deletion(-)
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/package-info.java
 create mode 100644 lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java

diff --git a/lucene/sandbox/src/java/module-info.java b/lucene/sandbox/src/java/module-info.java
index ee9be3227de2..ea49d9e2b26a 100644
--- a/lucene/sandbox/src/java/module-info.java
+++ b/lucene/sandbox/src/java/module-info.java
@@ -16,13 +16,16 @@
  */
 
 /** Various third party contributions and new ideas */
+@SuppressWarnings("requires-automatic")
 module org.apache.lucene.sandbox {
   requires org.apache.lucene.core;
   requires org.apache.lucene.queries;
   requires org.apache.lucene.facet;
+  requires jvector;
 
   exports org.apache.lucene.payloads;
   exports org.apache.lucene.sandbox.codecs.faiss;
+  exports org.apache.lucene.sandbox.codecs.jvector;
   exports org.apache.lucene.sandbox.codecs.idversion;
   exports org.apache.lucene.sandbox.codecs.quantization;
   exports org.apache.lucene.sandbox.document;
@@ -41,5 +44,6 @@
   provides org.apache.lucene.codecs.PostingsFormat with
       org.apache.lucene.sandbox.codecs.idversion.IDVersionPostingsFormat;
   provides org.apache.lucene.codecs.KnnVectorsFormat with
-      org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat;
+      org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat,
+      org.apache.lucene.sandbox.codecs.jvector.JVectorFormat;
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
new file mode 100644
index 000000000000..8357a5fcdb46
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
@@ -0,0 +1,99 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import org.apache.lucene.index.MergePolicy;
+import org.apache.lucene.index.MergeTrigger;
+import org.apache.lucene.index.SegmentCommitInfo;
+import org.apache.lucene.index.SegmentInfos;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * A merge policy that only merges segments if they are forced.
+ * This is useful for testing and benchmarking purposes. Since it can be used for benchmarks, it is placed in the common
+ * codec module.
+ */
+public class ForceMergesOnlyMergePolicy extends MergePolicy {
+    private final boolean useCompoundFile;
+
+    public ForceMergesOnlyMergePolicy() {
+        this(false);
+    }
+
+    public ForceMergesOnlyMergePolicy(boolean useCompoundFile) {
+        super();
+        this.useCompoundFile = useCompoundFile;
+    }
+
+    @Override
+    public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
+        throws IOException {
+        return null;
+    }
+
+    @Override
+    public MergeSpecification findForcedMerges(
+        SegmentInfos segmentInfos,
+        int maxSegmentCount,
+        Map<SegmentCommitInfo, Boolean> segmentsToMerge,
+        MergeContext mergeContext
+    ) throws IOException {
+        // If the segments are already merged (e.g. there's only 1 segment), or
+        // there are <maxNumSegments:.
+        if (isMerged(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext)) {
+            if (verbose(mergeContext)) {
+                message("already merged; skip", mergeContext);
+            }
+            return null;
+        }
+        final List<SegmentCommitInfo> segments = segmentInfos.asList();
+        MergeSpecification spec = new MergeSpecification();
+
+        final OneMerge merge = new OneMerge(segments);
+        spec.add(merge);
+        return spec;
+    }
+
+    @Override
+    public boolean useCompoundFile(SegmentInfos segmentInfos, SegmentCommitInfo newSegment, MergeContext mergeContext) throws IOException {
+        return useCompoundFile;
+    }
+
+    @Override
+    public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
+        return null;
+    }
+
+    /**
+     * Returns true if the number of segments eligible for merging is less than or equal to the
+     * specified {@code maxNumSegments}.
+     */
+    protected boolean isMerged(
+        SegmentInfos infos,
+        int maxNumSegments,
+        Map<SegmentCommitInfo, Boolean> segmentsToMerge,
+        MergeContext mergeContext
+    ) throws IOException {
+        final int numSegments = infos.size();
+        int numToMerge = 0;
+        SegmentCommitInfo mergeInfo = null;
+        boolean segmentIsOriginal = false;
+        for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++) {
+            final SegmentCommitInfo info = infos.info(i);
+            final Boolean isOriginal = segmentsToMerge.get(info);
+            if (isOriginal != null) {
+                segmentIsOriginal = isOriginal;
+                numToMerge++;
+                mergeInfo = info;
+            }
+        }
+
+        return numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, mergeContext));
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
new file mode 100644
index 000000000000..7fff91e12062
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -0,0 +1,150 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.index.Sorter;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+
+import java.io.IOException;
+import java.util.Arrays;
+
+/**
+ * This class represents the mapping from the Lucene document IDs to the jVector ordinals.
+ * This mapping is necessary because the jVector ordinals can be different from the Lucene document IDs and when lucene documentIDs change after a merge,
+ * we need to update this mapping to reflect the new document IDs.
+ * This requires us to know the previous mapping from the previous merge and the new mapping from the current merge.
+ * <p>
+ * Which means that we also need to persist this mapping to disk to be available across merges.
+ */
+@Log4j2
+public class GraphNodeIdToDocMap {
+    private static final int VERSION = 1;
+    private int[] graphNodeIdsToDocIds;
+    private int[] docIdsToGraphNodeIds;
+
+    /**
+     * Constructor that reads the mapping from the index input
+     *
+     * @param in The index input
+     * @throws IOException if an I/O error occurs
+     */
+    public GraphNodeIdToDocMap(IndexInput in) throws IOException {
+        final int version = in.readInt(); // Read the version
+        if (version != VERSION) {
+            throw new IOException("Unsupported version: " + version);
+        }
+        int size = in.readVInt();
+        int maxDocId = in.readVInt();
+
+        graphNodeIdsToDocIds = new int[size];
+        docIdsToGraphNodeIds = new int[maxDocId];
+        for (int ord = 0; ord < size; ord++) {
+            final int docId = in.readVInt();
+            graphNodeIdsToDocIds[ord] = docId;
+            docIdsToGraphNodeIds[docId] = ord;
+        }
+    }
+
+    /**
+     * Constructor that creates a new mapping between ordinals and docIds
+     *
+     * @param graphNodeIdsToDocIds The mapping from ordinals to docIds
+     */
+    public GraphNodeIdToDocMap(int[] graphNodeIdsToDocIds) {
+        if (graphNodeIdsToDocIds.length == 0) {
+            this.graphNodeIdsToDocIds = new int[0];
+            this.docIdsToGraphNodeIds = new int[0];
+            return;
+        }
+        this.graphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
+        System.arraycopy(graphNodeIdsToDocIds, 0, this.graphNodeIdsToDocIds, 0, graphNodeIdsToDocIds.length);
+        final int maxDocId = Arrays.stream(graphNodeIdsToDocIds).max().getAsInt();
+        final int maxDocs = maxDocId + 1;
+        // We are going to assume that the number of ordinals is roughly the same as the number of documents in the segment, therefore,
+        // the mapping will not be sparse.
+        if (maxDocs < graphNodeIdsToDocIds.length) {
+            throw new IllegalStateException("Max docs " + maxDocs + " is less than the number of ordinals " + graphNodeIdsToDocIds.length);
+        }
+        if (maxDocId > graphNodeIdsToDocIds.length) {
+            log.warn(
+                "Max doc id {} is greater than the number of ordinals {}, this implies a lot of deleted documents. Or that some documents are missing vectors. Wasting a lot of memory",
+                maxDocId,
+                graphNodeIdsToDocIds.length
+            );
+        }
+        this.docIdsToGraphNodeIds = new int[maxDocs];
+        Arrays.fill(this.docIdsToGraphNodeIds, -1); // -1 means no mapping to ordinal
+        for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
+            this.docIdsToGraphNodeIds[graphNodeIdsToDocIds[ord]] = ord;
+        }
+    }
+
+    /**
+     * Updates the mapping from the Lucene document IDs to the jVector ordinals based on the sort operation. (during flush)
+     *
+     * @param sortMap The sort map
+     */
+    public void update(Sorter.DocMap sortMap) {
+        final int[] newGraphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
+        final int maxNewDocId = Arrays.stream(graphNodeIdsToDocIds).map(sortMap::oldToNew).max().getAsInt();
+        final int maxDocs = maxNewDocId + 1;
+        if (maxDocs < graphNodeIdsToDocIds.length) {
+            throw new IllegalStateException("Max docs " + maxDocs + " is less than the number of ordinals " + graphNodeIdsToDocIds.length);
+        }
+        final int[] newDocIdsToOrdinals = new int[maxDocs];
+        Arrays.fill(newDocIdsToOrdinals, -1);
+        for (int oldDocId = 0; oldDocId < docIdsToGraphNodeIds.length; oldDocId++) {
+            if (docIdsToGraphNodeIds[oldDocId] == -1) {
+                continue;
+            }
+            final int newDocId = sortMap.oldToNew(oldDocId);
+            final int oldOrd = docIdsToGraphNodeIds[oldDocId];
+            newDocIdsToOrdinals[newDocId] = oldOrd;
+            newGraphNodeIdsToDocIds[oldOrd] = newDocId;
+        }
+        this.docIdsToGraphNodeIds = newDocIdsToOrdinals;
+        this.graphNodeIdsToDocIds = newGraphNodeIdsToDocIds;
+    }
+
+    /**
+     * Returns the jVector node id for the given Lucene document ID
+     *
+     * @param luceneDocId The Lucene document ID
+     * @return The jVector ordinal
+     */
+    public int getJVectorNodeId(int luceneDocId) {
+        return docIdsToGraphNodeIds[luceneDocId];
+    }
+
+    /**
+     * Returns the Lucene document ID for the given jVector node id
+     *
+     * @param graphNodeId The jVector ordinal
+     * @return The Lucene document ID
+     * <p>
+     * NOTE: This method is useful when, for example, we want to remap acceptedDocs bitmap from Lucene to jVector ordinal bitmap filter
+     */
+    public int getLuceneDocId(int graphNodeId) {
+        return graphNodeIdsToDocIds[graphNodeId];
+    }
+
+    /**
+     * Writes the mapping to the index output
+     *
+     * @param out The index output
+     * @throws IOException if an I/O error occurs
+     */
+    public void toOutput(IndexOutput out) throws IOException {
+        out.writeInt(VERSION);
+        out.writeVInt(graphNodeIdsToDocIds.length);
+        out.writeVInt(docIdsToGraphNodeIds.length);
+        for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
+            out.writeVInt(graphNodeIdsToDocIds[ord]);
+        }
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
new file mode 100644
index 000000000000..ce3008a79c29
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -0,0 +1,121 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
+import io.github.jbellis.jvector.util.Bits;
+import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
+import io.github.jbellis.jvector.vector.VectorizationProvider;
+import io.github.jbellis.jvector.vector.types.VectorFloat;
+import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.search.VectorScorer;
+
+import java.io.IOException;
+
+public class JVectorFloatVectorValues extends FloatVectorValues {
+    private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
+
+    private final OnDiskGraphIndex.View view;
+    private final VectorSimilarityFunction similarityFunction;
+    private final int dimension;
+    private final int size;
+    private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+
+    public JVectorFloatVectorValues(
+        OnDiskGraphIndex onDiskGraphIndex,
+        VectorSimilarityFunction similarityFunction,
+        GraphNodeIdToDocMap graphNodeIdToDocMap
+    ) throws IOException {
+        this.view = onDiskGraphIndex.getView();
+        this.dimension = view.dimension();
+        this.size = view.size();
+        this.similarityFunction = similarityFunction;
+        this.graphNodeIdToDocMap = graphNodeIdToDocMap;
+    }
+
+    @Override
+    public int dimension() {
+        return dimension;
+    }
+
+    @Override
+    public int size() {
+        return size;
+    }
+
+    // This allows us to access the vector without copying it to float[]
+    public VectorFloat<?> vectorFloatValue(int ord) {
+        return view.getVector(ord);
+    }
+
+    public DocIndexIterator iterator() {
+        return new DocIndexIterator() {
+            private int docId = -1;
+            private final Bits liveNodes = view.liveNodes();
+
+            @Override
+            public long cost() {
+                return size();
+            }
+
+            @Override
+            public int index() {
+                return graphNodeIdToDocMap.getJVectorNodeId(docId);
+            }
+
+            @Override
+            public int docID() {
+                return docId;
+            }
+
+            @Override
+            public int nextDoc() throws IOException {
+                // Advance to the next node docId starts from -1 which is why we need to increment docId by 1 "size"
+                // times
+                while (docId < size - 1) {
+                    docId++;
+                    if (liveNodes.get(docId)) {
+                        return docId;
+                    }
+                }
+                docId = NO_MORE_DOCS;
+
+                return docId;
+            }
+
+            @Override
+            public int advance(int target) throws IOException {
+                return slowAdvance(target);
+            }
+        };
+    }
+
+    @Override
+    public float[] vectorValue(int i) throws IOException {
+        try {
+            final VectorFloat<?> vector = vectorFloatValue(i);
+            return (float[]) vector.get();
+        } catch (Throwable e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    public VectorFloat<?> vectorValueObject(int i) throws IOException {
+        return vectorFloatValue(i);
+    }
+
+    @Override
+    public FloatVectorValues copy() throws IOException {
+        return this;
+    }
+
+    @Override
+    public VectorScorer scorer(float[] query) throws IOException {
+        return new JVectorVectorScorer(this, VECTOR_TYPE_SUPPORT.createFloatVector(query), similarityFunction);
+    }
+
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
new file mode 100644
index 000000000000..5d25622d3df6
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -0,0 +1,196 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.codecs.KnnVectorsFormat;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.index.SegmentReadState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.opensearch.knn.common.KNNConstants;
+
+import java.io.IOException;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.ForkJoinWorkerThread;
+import java.util.function.Function;
+
+@Log4j2
+public class JVectorFormat extends KnnVectorsFormat {
+    public static final String NAME = "JVectorFormat";
+    public static final String META_CODEC_NAME = "JVectorVectorsFormatMeta";
+    public static final String VECTOR_INDEX_CODEC_NAME = "JVectorVectorsFormatIndex";
+    public static final String NEIGHBORS_SCORE_CACHE_CODEC_NAME = "JVectorVectorsFormatNeighborsScoreCache";
+    public static final String JVECTOR_FILES_SUFFIX = "jvector";
+    public static final String META_EXTENSION = "meta-" + JVECTOR_FILES_SUFFIX;
+    public static final String VECTOR_INDEX_EXTENSION = "data-" + JVECTOR_FILES_SUFFIX;
+    public static final String NEIGHBORS_SCORE_CACHE_EXTENSION = "neighbors-score-cache-" + JVECTOR_FILES_SUFFIX;
+
+    public static final int VERSION_START = 0;
+    public static final int VERSION_CURRENT = VERSION_START;
+    public static final int DEFAULT_MAX_CONN = 32;
+    public static final int DEFAULT_BEAM_WIDTH = 100;
+    // Unfortunately, this can't be managed yet by the OpenSearch ThreadPool because it's not supporting {@link ForkJoinPool} types
+    public static final ForkJoinPool SIMD_POOL_MERGE = getPhysicalCoreExecutor();
+    public static final ForkJoinPool SIMD_POOL_FLUSH = getPhysicalCoreExecutor();
+
+    private final int maxConn;
+    private final int beamWidth;
+    private final Function<Integer, Integer> numberOfSubspacesPerVectorSupplier; // as a function of the original dimension
+    private final int minBatchSizeForQuantization;
+    private final float alpha;
+    private final float neighborOverflow;
+    private final boolean hierarchyEnabled;
+
+    public JVectorFormat() {
+        this(
+            NAME,
+            DEFAULT_MAX_CONN,
+            DEFAULT_BEAM_WIDTH,
+            KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
+            KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
+            JVectorFormat::getDefaultNumberOfSubspacesPerVector,
+            KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION,
+            KNNConstants.DEFAULT_HIERARCHY_ENABLED
+        );
+    }
+
+    public JVectorFormat(int minBatchSizeForQuantization) {
+        this(
+            NAME,
+            DEFAULT_MAX_CONN,
+            DEFAULT_BEAM_WIDTH,
+            KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
+            KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
+            JVectorFormat::getDefaultNumberOfSubspacesPerVector,
+            minBatchSizeForQuantization,
+            KNNConstants.DEFAULT_HIERARCHY_ENABLED
+        );
+    }
+
+    public JVectorFormat(
+        int maxConn,
+        int beamWidth,
+        float neighborOverflow,
+        float alpha,
+        Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+        int minBatchSizeForQuantization,
+        boolean hierarchyEnabled
+    ) {
+        this(
+            NAME,
+            maxConn,
+            beamWidth,
+            neighborOverflow,
+            alpha,
+            numberOfSubspacesPerVectorSupplier,
+            minBatchSizeForQuantization,
+            hierarchyEnabled
+        );
+    }
+
+    public JVectorFormat(
+        String name,
+        int maxConn,
+        int beamWidth,
+        float neighborOverflow,
+        float alpha,
+        Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+        int minBatchSizeForQuantization,
+        boolean hierarchyEnabled
+    ) {
+        super(name);
+        this.maxConn = maxConn;
+        this.beamWidth = beamWidth;
+        this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
+        this.minBatchSizeForQuantization = minBatchSizeForQuantization;
+        this.alpha = alpha;
+        this.neighborOverflow = neighborOverflow;
+        this.hierarchyEnabled = hierarchyEnabled;
+    }
+
+    @Override
+    public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+        return new JVectorWriter(
+            state,
+            maxConn,
+            beamWidth,
+            neighborOverflow,
+            alpha,
+            numberOfSubspacesPerVectorSupplier,
+            minBatchSizeForQuantization,
+            hierarchyEnabled
+        );
+    }
+
+    @Override
+    public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+        return new JVectorReader(state);
+    }
+
+    @Override
+    public int getMaxDimensions(String s) {
+        // Not a hard limit, but a reasonable default
+        return 8192;
+    }
+
+    /**
+     * This method returns the default number of subspaces per vector for a given original dimension.
+     * Should be used as a default value for the number of subspaces per vector in case no value is provided.
+     *
+     * @param originalDimension original vector dimension
+     * @return default number of subspaces per vector
+     */
+    public static int getDefaultNumberOfSubspacesPerVector(int originalDimension) {
+        // the idea here is that higher dimensions compress well, but not so well that we should use fewer bits
+        // than a lower-dimension vector, which is what you could get with cutoff points to switch between (e.g.)
+        // D*0.5 and D*0.25. Thus, the following ensures that bytes per vector is strictly increasing with D.
+        int compressedBytes;
+        if (originalDimension <= 32) {
+            // We are compressing from 4-byte floats to single-byte codebook indexes,
+            // so this represents compression of 4x
+            // * GloVe-25 needs 25 BPV to achieve good recall
+            compressedBytes = originalDimension;
+        } else if (originalDimension <= 64) {
+            // * GloVe-50 performs fine at 25
+            compressedBytes = 32;
+        } else if (originalDimension <= 200) {
+            // * GloVe-100 and -200 perform well at 50 and 100 BPV, respectively
+            compressedBytes = (int) (originalDimension * 0.5);
+        } else if (originalDimension <= 400) {
+            // * NYTimes-256 actually performs fine at 64 BPV but we'll be conservative
+            // since we don't want BPV to decrease
+            compressedBytes = 100;
+        } else if (originalDimension <= 768) {
+            // allow BPV to increase linearly up to 192
+            compressedBytes = (int) (originalDimension * 0.25);
+        } else if (originalDimension <= 1536) {
+            // * ada002 vectors have good recall even at 192 BPV = compression of 32x
+            compressedBytes = 192;
+        } else {
+            // We have not tested recall with larger vectors than this, let's let it increase linearly
+            compressedBytes = (int) (originalDimension * 0.125);
+        }
+        return compressedBytes;
+    }
+
+    public static ForkJoinPool getPhysicalCoreExecutor() {
+        final int estimatedPhysicalCoreCount = Integer.getInteger(
+            "jvector.physical_core_count",
+            Math.max(1, Runtime.getRuntime().availableProcessors() / 2)
+        );
+        assert estimatedPhysicalCoreCount > 0 && estimatedPhysicalCoreCount <= Runtime.getRuntime().availableProcessors()
+            : "Invalid core count: " + estimatedPhysicalCoreCount;
+        final ForkJoinPool.ForkJoinWorkerThreadFactory factory = pool -> {
+            ForkJoinWorkerThread thread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
+            thread.setPriority(Thread.NORM_PRIORITY - 2);
+            return thread;
+        };
+
+        log.info("Creating SIMD ForkJoinPool with {} physical cores for JVector SIMD operations", estimatedPhysicalCoreCount);
+        return new ForkJoinPool(estimatedPhysicalCoreCount, factory, null, true);
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
new file mode 100644
index 000000000000..b01b4c8db1bb
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -0,0 +1,105 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import io.github.jbellis.jvector.disk.IndexWriter;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.store.IndexOutput;
+
+import java.io.IOException;
+
+/**
+ * JVectorRandomAccessWriter is a wrapper around IndexOutput that implements RandomAccessWriter.
+ * Note: This is not thread safe!
+ */
+@Log4j2
+public class JVectorIndexWriter implements IndexWriter {
+    private final IndexOutput indexOutputDelegate;
+
+    public JVectorIndexWriter(IndexOutput indexOutputDelegate) {
+        this.indexOutputDelegate = indexOutputDelegate;
+    }
+
+    @Override
+    public long position() throws IOException {
+        return indexOutputDelegate.getFilePointer();
+    }
+
+    @Override
+    public void close() throws IOException {
+        indexOutputDelegate.close();
+    }
+
+    @Override
+    public void write(int b) throws IOException {
+        indexOutputDelegate.writeByte((byte) b);
+    }
+
+    @Override
+    public void write(byte[] b) throws IOException {
+        indexOutputDelegate.writeBytes(b, 0, b.length);
+    }
+
+    @Override
+    public void write(byte[] b, int off, int len) throws IOException {
+        indexOutputDelegate.writeBytes(b, off, len);
+    }
+
+    @Override
+    public void writeBoolean(boolean v) throws IOException {
+        indexOutputDelegate.writeByte((byte) (v ? 1 : 0));
+    }
+
+    @Override
+    public void writeByte(int v) throws IOException {
+        indexOutputDelegate.writeByte((byte) v);
+    }
+
+    @Override
+    public void writeShort(int v) throws IOException {
+        indexOutputDelegate.writeShort((short) v);
+    }
+
+    @Override
+    public void writeChar(int v) throws IOException {
+        throw new UnsupportedOperationException("JVectorRandomAccessWriter does not support writing chars");
+    }
+
+    @Override
+    public void writeInt(int v) throws IOException {
+        indexOutputDelegate.writeInt(v);
+    }
+
+    @Override
+    public void writeLong(long v) throws IOException {
+        indexOutputDelegate.writeLong(v);
+    }
+
+    @Override
+    public void writeFloat(float v) throws IOException {
+        indexOutputDelegate.writeInt(Float.floatToIntBits(v));
+    }
+
+    @Override
+    public void writeDouble(double v) throws IOException {
+        writeLong(Double.doubleToLongBits(v));
+    }
+
+    @Override
+    public void writeBytes(String s) throws IOException {
+        throw new UnsupportedOperationException("JVectorIndexWriter does not support writing String as bytes");
+    }
+
+    @Override
+    public void writeChars(String s) throws IOException {
+        throw new UnsupportedOperationException("JVectorIndexWriter does not support writing chars");
+    }
+
+    @Override
+    public void writeUTF(String s) throws IOException {
+        throw new UnsupportedOperationException("JVectorIndexWriter does not support writing UTF strings");
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
new file mode 100644
index 000000000000..573726f5f19a
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
@@ -0,0 +1,67 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.knn.index.codec.jvector;
+
+import lombok.Value;
+import org.apache.lucene.search.KnnCollector;
+import org.apache.lucene.search.knn.KnnSearchStrategy;
+import org.apache.lucene.search.TopDocs;
+
+/**
+ * Wrapper class for KnnCollector that provides passing of additional parameters specific for JVector.
+ */
+@Value
+public class JVectorKnnCollector implements KnnCollector {
+    KnnCollector delegate;
+    float threshold;
+    float rerankFloor;
+    int overQueryFactor;
+    boolean usePruning;
+
+    @Override
+    public boolean earlyTerminated() {
+        return delegate.earlyTerminated();
+    }
+
+    @Override
+    public void incVisitedCount(int count) {
+        delegate.incVisitedCount(count);
+    }
+
+    @Override
+    public long visitedCount() {
+        return delegate.visitedCount();
+    }
+
+    @Override
+    public long visitLimit() {
+        return delegate.visitLimit();
+    }
+
+    @Override
+    public int k() {
+        return delegate.k();
+    }
+
+    @Override
+    public boolean collect(int docId, float similarity) {
+        return delegate.collect(docId, similarity);
+    }
+
+    @Override
+    public float minCompetitiveSimilarity() {
+        return delegate.minCompetitiveSimilarity();
+    }
+
+    @Override
+    public TopDocs topDocs() {
+        return delegate.topDocs();
+    }
+
+    @Override
+    public KnnSearchStrategy getSearchStrategy() {
+        return delegate.getSearchStrategy();
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
new file mode 100644
index 000000000000..922a7dcd55b1
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
@@ -0,0 +1,83 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+package org.opensearch.knn.index.codec.jvector;
+
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.LeafReader;
+import org.apache.lucene.index.LeafReaderContext;
+import org.apache.lucene.search.*;
+import org.apache.lucene.search.knn.KnnCollectorManager;
+import org.apache.lucene.search.knn.KnnSearchStrategy;
+import org.apache.lucene.util.Bits;
+
+import java.io.IOException;
+
+/**
+ * {@link KnnFloatVectorQuery} that uses jVector to perform the search.
+ * We use this wrapper simply because we can't pass jVector specific parameters with the upstream {@link KnnFloatVectorQuery}.
+ */
+public class JVectorKnnFloatVectorQuery extends KnnFloatVectorQuery {
+    private static final TopDocs NO_RESULTS = TopDocsCollector.EMPTY_TOPDOCS;
+    private final int overQueryFactor;
+    private final float threshold;
+    private final float rerankFloor;
+    private final boolean usePruning;
+
+    public JVectorKnnFloatVectorQuery(
+        String field,
+        float[] target,
+        int k,
+        int overQueryFactor,
+        float threshold,
+        float rerankFloor,
+        boolean usePruning
+    ) {
+        super(field, target, k);
+        this.overQueryFactor = overQueryFactor;
+        this.threshold = threshold;
+        this.rerankFloor = rerankFloor;
+        this.usePruning = usePruning;
+    }
+
+    public JVectorKnnFloatVectorQuery(
+        String field,
+        float[] target,
+        int k,
+        Query filter,
+        int overQueryFactor,
+        float threshold,
+        float rerankFloor,
+        boolean usePruning
+    ) {
+        super(field, target, k, filter);
+        this.overQueryFactor = overQueryFactor;
+        this.threshold = threshold;
+        this.rerankFloor = rerankFloor;
+        this.usePruning = usePruning;
+    }
+
+    @Override
+    protected TopDocs approximateSearch(
+        LeafReaderContext context,
+        Bits acceptDocs,
+        int visitedLimit,
+        KnnCollectorManager knnCollectorManager
+    ) throws IOException {
+        final KnnCollector delegateCollector = knnCollectorManager.newCollector(visitedLimit, KnnSearchStrategy.Hnsw.DEFAULT, context);
+        final KnnCollector knnCollector = new JVectorKnnCollector(delegateCollector, threshold, rerankFloor, overQueryFactor, usePruning);
+        LeafReader reader = context.reader();
+        FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field);
+        if (floatVectorValues == null) {
+            FloatVectorValues.checkField(reader, field);
+            return NO_RESULTS;
+        }
+        if (Math.min(knnCollector.k(), floatVectorValues.size()) == 0) {
+            return NO_RESULTS;
+        }
+        reader.searchNearestVectors(field, getTargetCopy(), knnCollector, acceptDocs);
+        TopDocs results = knnCollector.topDocs();
+        return results != null ? results : NO_RESULTS;
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
new file mode 100644
index 000000000000..c3b823010c6d
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -0,0 +1,174 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import io.github.jbellis.jvector.disk.RandomAccessReader;
+import io.github.jbellis.jvector.disk.ReaderSupplier;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.FloatBuffer;
+import java.util.concurrent.ConcurrentHashMap;
+import java.util.concurrent.atomic.AtomicInteger;
+
+@Log4j2
+public class JVectorRandomAccessReader implements RandomAccessReader {
+    private final byte[] internalBuffer = new byte[Long.BYTES];
+    private final byte[] internalFloatBuffer = new byte[Float.BYTES];
+    private final IndexInput indexInputDelegate;
+    private volatile boolean closed = false;
+
+    public JVectorRandomAccessReader(IndexInput indexInputDelegate) {
+        this.indexInputDelegate = indexInputDelegate;
+    }
+
+    @Override
+    public void seek(long offset) throws IOException {
+        indexInputDelegate.seek(offset);
+    }
+
+    @Override
+    public long getPosition() throws IOException {
+        return indexInputDelegate.getFilePointer();
+    }
+
+    @Override
+    public int readInt() throws IOException {
+        return indexInputDelegate.readInt();
+    }
+
+    @Override
+    public float readFloat() throws IOException {
+        return Float.intBitsToFloat(indexInputDelegate.readInt());
+    }
+
+    // TODO: bring back to override when upgrading jVector again
+    // @Override
+    public long readLong() throws IOException {
+        return indexInputDelegate.readLong();
+    }
+
+    @Override
+    public void readFully(byte[] bytes) throws IOException {
+        indexInputDelegate.readBytes(bytes, 0, bytes.length);
+    }
+
+    @Override
+    public void readFully(ByteBuffer buffer) throws IOException {
+        // validate that the requested bytes actually exist ----
+        long remainingInFile = indexInputDelegate.length() - indexInputDelegate.getFilePointer();
+        if (buffer.remaining() > remainingInFile) {
+            throw new EOFException("Requested " + buffer.remaining() + " bytes but only " + remainingInFile + " available");
+        }
+
+        // Heap buffers with a backing array can be filled in one call ----
+        if (buffer.hasArray()) {
+            int off = buffer.arrayOffset() + buffer.position();
+            int len = buffer.remaining();
+            indexInputDelegate.readBytes(buffer.array(), off, len);
+            buffer.position(buffer.limit());           // advance fully
+            return;
+        }
+
+        // Direct / non-array buffers: copy in reasonable chunks ----
+        while (buffer.hasRemaining()) {
+            final int bytesToRead = Math.min(buffer.remaining(), Long.BYTES);
+            indexInputDelegate.readBytes(this.internalBuffer, 0, bytesToRead);
+            buffer.put(this.internalBuffer, 0, bytesToRead);
+        }
+    }
+
+    @Override
+    public void readFully(long[] vector) throws IOException {
+        for (int i = 0; i < vector.length; i++) {
+            vector[i] = readLong();
+        }
+    }
+
+    @Override
+    public void read(int[] ints, int offset, int count) throws IOException {
+        for (int i = 0; i < count; i++) {
+            ints[offset + i] = readInt();
+        }
+    }
+
+    @Override
+    public void read(float[] floats, int offset, int count) throws IOException {
+        final ByteBuffer byteBuffer = ByteBuffer.allocate(Float.BYTES * count);
+        indexInputDelegate.readBytes(byteBuffer.array(), offset, Float.BYTES * count);
+        FloatBuffer buffer = byteBuffer.asFloatBuffer();
+        buffer.get(floats, offset, count);
+    }
+
+    @Override
+    public void close() throws IOException {
+        log.debug("Closing JVectorRandomAccessReader for file: {}", indexInputDelegate);
+        this.closed = true;
+        // no need to really close the index input delegate since it is a clone
+        log.debug("Closed JVectorRandomAccessReader for file: {}", indexInputDelegate);
+    }
+
+    @Override
+    public long length() throws IOException {
+        return indexInputDelegate.length();
+    }
+
+    /**
+     * Supplies readers which are actually slices of the original IndexInput.
+     * We will vend out slices in order for us to easily find the footer of the jVector graph index.
+     * This is useful because our logic that reads the graph that the footer is always at {@link IndexInput#length()} of the slice.
+     * Which is how {@link io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)} is working behind the scenes.
+     * The header offset, on the other hand, is flexible because we can provide it as a parameter to {@link io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)}
+     */
+    public static class Supplier implements ReaderSupplier {
+        private final AtomicInteger readerCount = new AtomicInteger(0);
+        private final IndexInput currentInput;
+        private final long sliceStartOffset;
+        private final long sliceLength;
+        private final ConcurrentHashMap<Integer, RandomAccessReader> readers = new ConcurrentHashMap<>();
+
+        public Supplier(IndexInput indexInput) throws IOException {
+            this(indexInput, indexInput.getFilePointer(), indexInput.length() - indexInput.getFilePointer());
+        }
+
+        public Supplier(IndexInput indexInput, long sliceStartOffset, long sliceLength) throws IOException {
+            this.currentInput = indexInput;
+            this.sliceStartOffset = sliceStartOffset;
+            this.sliceLength = sliceLength;
+        }
+
+        @Override
+        public RandomAccessReader get() throws IOException {
+            synchronized (this) {
+                final IndexInput input = currentInput.slice("Input Slice for the jVector graph or PQ", sliceStartOffset, sliceLength)
+                    .clone();
+
+                var reader = new JVectorRandomAccessReader(input);
+                int readerId = readerCount.getAndIncrement();
+                readers.put(readerId, reader);
+                return reader;
+            }
+
+        }
+
+        @Override
+        public void close() throws IOException {
+            // Close source of all cloned inputs
+            IOUtils.closeWhileHandlingException(currentInput);
+
+            // Close all readers
+            for (RandomAccessReader reader : readers.values()) {
+                IOUtils.closeWhileHandlingException(reader::close);
+            }
+            readers.clear();
+            readerCount.set(0);
+        }
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
new file mode 100644
index 000000000000..3c8aa4622000
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -0,0 +1,382 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import io.github.jbellis.jvector.disk.RandomAccessReader;
+import io.github.jbellis.jvector.disk.ReaderSupplier;
+import io.github.jbellis.jvector.graph.GraphSearcher;
+import io.github.jbellis.jvector.graph.SearchResult;
+import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
+import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider;
+import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
+import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
+import io.github.jbellis.jvector.quantization.PQVectors;
+import io.github.jbellis.jvector.quantization.ProductQuantization;
+import io.github.jbellis.jvector.vector.VectorizationProvider;
+import io.github.jbellis.jvector.vector.types.VectorFloat;
+import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.KnnCollector;
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
+import org.opensearch.knn.common.KNNConstants;
+import org.opensearch.knn.plugin.stats.KNNCounter;
+
+import java.io.Closeable;
+import java.io.IOException;
+
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
+
+@Log4j2
+public class JVectorReader extends KnnVectorsReader {
+    private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
+
+    private final FieldInfos fieldInfos;
+    private final String baseDataFileName;
+    // Maps field name to field entries
+    private final Map<String, FieldEntry> fieldEntryMap = new HashMap<>(1);
+    private final Directory directory;
+    private final SegmentReadState state;
+
+    public JVectorReader(SegmentReadState state) throws IOException {
+        this.state = state;
+        this.fieldInfos = state.fieldInfos;
+        this.baseDataFileName = state.segmentInfo.name + "_" + state.segmentSuffix;
+        final String metaFileName = IndexFileNames.segmentFileName(
+            state.segmentInfo.name,
+            state.segmentSuffix,
+            JVectorFormat.META_EXTENSION
+        );
+        this.directory = state.directory;
+        boolean success = false;
+        try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName)) {
+            CodecUtil.checkIndexHeader(
+                meta,
+                JVectorFormat.META_CODEC_NAME,
+                JVectorFormat.VERSION_START,
+                JVectorFormat.VERSION_CURRENT,
+                state.segmentInfo.getId(),
+                state.segmentSuffix
+            );
+            readFields(meta);
+            CodecUtil.checkFooter(meta);
+
+            success = true;
+        } finally {
+            if (!success) {
+                IOUtils.closeWhileHandlingException(this);
+            }
+        }
+    }
+
+    @Override
+    public void checkIntegrity() throws IOException {
+        for (FieldEntry fieldEntry : fieldEntryMap.values()) {
+            // Verify the vector index file
+            try (var indexInput = state.directory.openInput(fieldEntry.vectorIndexFieldDataFileName, IOContext.READONCE)) {
+                CodecUtil.checksumEntireFile(indexInput);
+            }
+
+            // Verify the neighbors score cache file
+            try (var indexInput = state.directory.openInput(fieldEntry.neighborsScoreCacheIndexFieldFileName, IOContext.READONCE)) {
+                CodecUtil.checksumEntireFile(indexInput);
+            }
+        }
+    }
+
+    @Override
+    public FloatVectorValues getFloatVectorValues(String field) throws IOException {
+        final FieldEntry fieldEntry = fieldEntryMap.get(field);
+        return new JVectorFloatVectorValues(fieldEntry.index, fieldEntry.similarityFunction, fieldEntry.graphNodeIdToDocMap);
+    }
+
+    @Override
+    public ByteVectorValues getByteVectorValues(String field) throws IOException {
+        /**
+         * Byte vector values are not supported in jVector library. Instead use PQ.
+         */
+        return null;
+    }
+
+    public Optional<ProductQuantization> getProductQuantizationForField(String field) throws IOException {
+        final FieldEntry fieldEntry = fieldEntryMap.get(field);
+        if (fieldEntry.pqVectors == null) {
+            return Optional.empty();
+        }
+
+        return Optional.of(fieldEntry.pqVectors.getCompressor());
+    }
+
+    public RandomAccessReader getNeighborsScoreCacheForField(String field) throws IOException {
+        final FieldEntry fieldEntry = fieldEntryMap.get(field);
+        return fieldEntry.neighborsScoreCacheIndexReaderSupplier.get();
+    }
+
+    public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
+        return fieldEntryMap.get(field).index;
+    }
+
+    @Override
+    public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
+        final OnDiskGraphIndex index = fieldEntryMap.get(field).index;
+        final JVectorKnnCollector jvectorKnnCollector;
+        if (knnCollector instanceof JVectorKnnCollector) {
+            jvectorKnnCollector = (JVectorKnnCollector) knnCollector;
+        } else {
+            log.warn("KnnCollector must be of type JVectorKnnCollector, for now we will re-wrap it but this is not ideal");
+            jvectorKnnCollector = new JVectorKnnCollector(
+                knnCollector,
+                KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
+                KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
+                KNNConstants.DEFAULT_OVER_QUERY_FACTOR,
+                KNNConstants.DEFAULT_QUERY_USE_PRUNING
+            );
+
+        }
+
+        // search for a random vector using a GraphSearcher and SearchScoreProvider
+        VectorFloat<?> q = VECTOR_TYPE_SUPPORT.createFloatVector(target);
+        final SearchScoreProvider ssp;
+
+        try (var view = index.getView()) {
+            final long graphSearchStart = System.currentTimeMillis();
+            if (fieldEntryMap.get(field).pqVectors != null) { // Quantized, use the precomputed score function
+                final PQVectors pqVectors = fieldEntryMap.get(field).pqVectors;
+                // SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
+                // then reranks with the exact vectors that are stored on disk in the index
+                ScoreFunction.ApproximateScoreFunction asf = pqVectors.precomputedScoreFunctionFor(
+                    q,
+                    fieldEntryMap.get(field).similarityFunction
+                );
+                ScoreFunction.ExactScoreFunction reranker = view.rerankerFor(q, fieldEntryMap.get(field).similarityFunction);
+                ssp = new DefaultSearchScoreProvider(asf, reranker);
+            } else { // Not quantized, used typical searcher
+                ssp = DefaultSearchScoreProvider.exact(q, fieldEntryMap.get(field).similarityFunction, view);
+            }
+            final GraphNodeIdToDocMap jvectorLuceneDocMap = fieldEntryMap.get(field).graphNodeIdToDocMap;
+            // Convert the acceptDocs bitmap from Lucene to jVector ordinal bitmap filter
+            // Logic works as follows: if acceptDocs is null, we accept all ordinals. Otherwise, we check if the jVector ordinal has a
+            // corresponding Lucene doc ID accepted by acceptDocs filter.
+            io.github.jbellis.jvector.util.Bits compatibleBits = ord -> acceptDocs == null
+                || acceptDocs.get(jvectorLuceneDocMap.getLuceneDocId(ord));
+
+            try (var graphSearcher = new GraphSearcher(index)) {
+                final var searchResults = graphSearcher.search(
+                    ssp,
+                    jvectorKnnCollector.k(),
+                    jvectorKnnCollector.k() * jvectorKnnCollector.getOverQueryFactor(),
+                    jvectorKnnCollector.getThreshold(),
+                    jvectorKnnCollector.getRerankFloor(),
+                    compatibleBits
+                );
+                for (SearchResult.NodeScore ns : searchResults.getNodes()) {
+                    jvectorKnnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
+                }
+                final long graphSearchEnd = System.currentTimeMillis();
+                final long searchTime = graphSearchEnd - graphSearchStart;
+                log.debug("Search (including acquiring view) took {} ms", searchTime);
+
+                // Collect the below metrics about the search and somehow wire this back to {@link @KNNStats}
+                final int visitedNodesCount = searchResults.getVisitedCount();
+                final int rerankedCount = searchResults.getRerankedCount();
+
+                final int expandedCount = searchResults.getExpandedCount();
+                final int expandedBaseLayerCount = searchResults.getExpandedCountBaseLayer();
+
+                KNNCounter.KNN_QUERY_VISITED_NODES.add(visitedNodesCount);
+                KNNCounter.KNN_QUERY_RERANKED_COUNT.add(rerankedCount);
+                KNNCounter.KNN_QUERY_EXPANDED_NODES.add(expandedCount);
+                KNNCounter.KNN_QUERY_EXPANDED_BASE_LAYER_NODES.add(expandedBaseLayerCount);
+                KNNCounter.KNN_QUERY_GRAPH_SEARCH_TIME.add(searchTime);
+                log.debug(
+                    "rerankedCount: {}, visitedNodesCount: {}, expandedCount: {}, expandedBaseLayerCount: {}",
+                    rerankedCount,
+                    visitedNodesCount,
+                    expandedCount,
+                    expandedBaseLayerCount
+                );
+
+            }
+        }
+    }
+
+    @Override
+    public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
+        // TODO: implement this
+        throw new UnsupportedOperationException("Byte vector search is not supported yet with jVector");
+    }
+
+    @Override
+    public void close() throws IOException {
+        for (FieldEntry fieldEntry : fieldEntryMap.values()) {
+            IOUtils.close(fieldEntry);
+        }
+        fieldEntryMap.clear();
+    }
+
+    private void readFields(ChecksumIndexInput meta) throws IOException {
+        for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
+            final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); // read field number
+            JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata = new JVectorWriter.VectorIndexFieldMetadata(meta);
+            assert fieldInfo.number == vectorIndexFieldMetadata.getFieldNumber();
+            fieldEntryMap.put(fieldInfo.name, new FieldEntry(fieldInfo, vectorIndexFieldMetadata));
+        }
+    }
+
+    class FieldEntry implements Closeable {
+        private final FieldInfo fieldInfo;
+        private final VectorEncoding vectorEncoding;
+        private final VectorSimilarityFunction similarityFunction;
+        private final int dimension;
+        private final long vectorIndexOffset;
+        private final long vectorIndexLength;
+        private final long pqCodebooksAndVectorsLength;
+        private final long pqCodebooksAndVectorsOffset;
+        private final String vectorIndexFieldDataFileName;
+        private final String neighborsScoreCacheIndexFieldFileName;
+        private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+        private final ReaderSupplier indexReaderSupplier;
+        private final ReaderSupplier pqCodebooksReaderSupplier;
+        private final ReaderSupplier neighborsScoreCacheIndexReaderSupplier;
+        private final OnDiskGraphIndex index;
+        private final PQVectors pqVectors; // The product quantized vectors with their codebooks
+
+        public FieldEntry(FieldInfo fieldInfo, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata) throws IOException {
+            this.fieldInfo = fieldInfo;
+            this.similarityFunction = VectorSimilarityMapper.ordToDistFunc(
+                vectorIndexFieldMetadata.getVectorSimilarityFunction().ordinal()
+            );
+            this.vectorEncoding = vectorIndexFieldMetadata.getVectorEncoding();
+            this.vectorIndexOffset = vectorIndexFieldMetadata.getVectorIndexOffset();
+            this.vectorIndexLength = vectorIndexFieldMetadata.getVectorIndexLength();
+            this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.getPqCodebooksAndVectorsLength();
+            this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.getPqCodebooksAndVectorsOffset();
+            this.dimension = vectorIndexFieldMetadata.getVectorDimension();
+            this.graphNodeIdToDocMap = vectorIndexFieldMetadata.getGraphNodeIdToDocMap();
+
+            this.vectorIndexFieldDataFileName = baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
+            this.neighborsScoreCacheIndexFieldFileName = baseDataFileName
+                + "_"
+                + fieldInfo.name
+                + "."
+                + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
+
+            // For the slice we would like to include the Lucene header, unfortunately, we have to do this because jVector use global
+            // offsets instead of local offsets
+            final long sliceLength = vectorIndexLength + CodecUtil.indexHeaderLength(
+                JVectorFormat.VECTOR_INDEX_CODEC_NAME,
+                state.segmentSuffix
+            );
+            // Load the graph index
+            this.indexReaderSupplier = new JVectorRandomAccessReader.Supplier(
+                directory.openInput(vectorIndexFieldDataFileName, state.context),
+                0,
+                sliceLength
+            );
+            this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
+
+            // If quantized load the compressed product quantized vectors with their codebooks
+            if (pqCodebooksAndVectorsLength > 0) {
+                assert pqCodebooksAndVectorsOffset > 0;
+                if (pqCodebooksAndVectorsOffset < vectorIndexOffset) {
+                    throw new IllegalArgumentException("pqCodebooksAndVectorsOffset must be greater than vectorIndexOffset");
+                }
+                this.pqCodebooksReaderSupplier = new JVectorRandomAccessReader.Supplier(
+                    directory.openInput(vectorIndexFieldDataFileName, IOContext.READONCE),
+                    pqCodebooksAndVectorsOffset,
+                    pqCodebooksAndVectorsLength
+                );
+                log.debug(
+                    "Loading PQ codebooks and vectors for field {}, with numbers of vectors: {}",
+                    fieldInfo.name,
+                    state.segmentInfo.maxDoc()
+                );
+                try (final var randomAccessReader = pqCodebooksReaderSupplier.get()) {
+                    this.pqVectors = PQVectors.load(randomAccessReader);
+                }
+            } else {
+                this.pqCodebooksReaderSupplier = null;
+                this.pqVectors = null;
+            }
+
+            final IndexInput indexInput = directory.openInput(neighborsScoreCacheIndexFieldFileName, state.context);
+            CodecUtil.readIndexHeader(indexInput);
+
+            this.neighborsScoreCacheIndexReaderSupplier = new JVectorRandomAccessReader.Supplier(indexInput);
+        }
+
+        @Override
+        public void close() throws IOException {
+            if (indexReaderSupplier != null) {
+                IOUtils.close(indexReaderSupplier::close);
+            }
+            if (pqCodebooksReaderSupplier != null) {
+                IOUtils.close(pqCodebooksReaderSupplier::close);
+            }
+            if (neighborsScoreCacheIndexReaderSupplier != null) {
+                IOUtils.close(neighborsScoreCacheIndexReaderSupplier::close);
+            }
+        }
+    }
+
+    /**
+     * Utility class to map between Lucene and jVector similarity functions and metadata ordinals.
+     */
+    public static class VectorSimilarityMapper {
+        /**
+         List of vector similarity functions supported by <a href="https://github.com/jbellis/jvector">jVector library</a>
+         The similarity functions orders matter in this list because it is later used to resolve the similarity function by ordinal.
+         */
+        public static final List<VectorSimilarityFunction> JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS = List.of(
+            VectorSimilarityFunction.EUCLIDEAN,
+            VectorSimilarityFunction.DOT_PRODUCT,
+            VectorSimilarityFunction.COSINE
+        );
+
+        public static final Map<org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction> LUCENE_TO_JVECTOR_MAP = Map.of(
+            org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN,
+            VectorSimilarityFunction.EUCLIDEAN,
+            org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT,
+            VectorSimilarityFunction.DOT_PRODUCT,
+            org.apache.lucene.index.VectorSimilarityFunction.COSINE,
+            VectorSimilarityFunction.COSINE
+        );
+
+        public static int distFuncToOrd(org.apache.lucene.index.VectorSimilarityFunction func) {
+            if (LUCENE_TO_JVECTOR_MAP.containsKey(func)) {
+                return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.indexOf(LUCENE_TO_JVECTOR_MAP.get(func));
+            }
+
+            throw new IllegalArgumentException("invalid distance function: " + func);
+        }
+
+        public static VectorSimilarityFunction ordToDistFunc(int ord) {
+            return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
+        }
+
+        public static org.apache.lucene.index.VectorSimilarityFunction ordToLuceneDistFunc(int ord) {
+            if (ord < 0 || ord >= JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.size()) {
+                throw new IllegalArgumentException("Invalid ordinal: " + ord);
+            }
+            VectorSimilarityFunction jvectorFunc = JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
+            for (Map.Entry<org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction> entry : LUCENE_TO_JVECTOR_MAP
+                .entrySet()) {
+                if (entry.getValue().equals(jvectorFunc)) {
+                    return entry.getKey();
+                }
+            }
+            throw new IllegalStateException("No matching Lucene VectorSimilarityFunction found for ordinal: " + ord);
+        }
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
new file mode 100644
index 000000000000..e27b168b6362
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
@@ -0,0 +1,38 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
+import io.github.jbellis.jvector.vector.types.VectorFloat;
+import org.apache.lucene.index.KnnVectorValues;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.search.VectorScorer;
+
+import java.io.IOException;
+
+public class JVectorVectorScorer implements VectorScorer {
+    private final JVectorFloatVectorValues floatVectorValues;
+    private final KnnVectorValues.DocIndexIterator docIndexIterator;
+    private final VectorFloat<?> target;
+    private final VectorSimilarityFunction similarityFunction;
+
+    public JVectorVectorScorer(JVectorFloatVectorValues vectorValues, VectorFloat<?> target, VectorSimilarityFunction similarityFunction) {
+        this.floatVectorValues = vectorValues;
+        this.docIndexIterator = floatVectorValues.iterator();
+        this.target = target;
+        this.similarityFunction = similarityFunction;
+    }
+
+    @Override
+    public float score() throws IOException {
+        return similarityFunction.compare(target, floatVectorValues.vectorFloatValue(docIndexIterator.index()));
+    }
+
+    @Override
+    public DocIdSetIterator iterator() {
+        return docIndexIterator;
+    }
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
new file mode 100644
index 000000000000..434e08a6964e
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -0,0 +1,1097 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import io.github.jbellis.jvector.disk.RandomAccessReader;
+import io.github.jbellis.jvector.graph.*;
+import io.github.jbellis.jvector.graph.disk.*;
+import io.github.jbellis.jvector.graph.disk.feature.Feature;
+import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
+import io.github.jbellis.jvector.graph.disk.feature.InlineVectors;
+import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
+import io.github.jbellis.jvector.quantization.PQVectors;
+import io.github.jbellis.jvector.quantization.ProductQuantization;
+import io.github.jbellis.jvector.vector.VectorizationProvider;
+import io.github.jbellis.jvector.vector.types.VectorFloat;
+import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
+import lombok.AllArgsConstructor;
+import lombok.Builder;
+import lombok.Getter;
+import lombok.Value;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.codecs.CodecUtil;
+import org.apache.lucene.codecs.KnnFieldVectorsWriter;
+import org.apache.lucene.codecs.KnnVectorsReader;
+import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.DocIdSetIterator;
+import org.apache.lucene.store.*;
+import org.apache.lucene.util.Bits;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.RamUsageEstimator;
+import org.opensearch.knn.plugin.stats.KNNCounter;
+
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.time.Clock;
+import java.util.*;
+import java.util.concurrent.ForkJoinPool;
+import java.util.function.Function;
+import java.util.stream.IntStream;
+
+import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED;
+import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
+import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_FLUSH;
+import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_MERGE;
+
+/**
+ * JVectorWriter is responsible for writing vector data into index segments using the JVector library.
+ *
+ * <h2>Persisting the JVector Graph Index</h2>
+ *
+ * <p>
+ * Flushing data into disk segments occurs in two scenarios:
+ * <ol>
+ *     <li>When the segment is being flushed to disk (e.g., when a new segment is created) via {@link #flush(int, Sorter.DocMap)}</li>
+ *     <li>When the segment is a result of a merge (e.g., when multiple segments are merged into one) via {@link #mergeOneField(FieldInfo, MergeState)}</li>
+ * </ol>
+ *
+ * <h2>jVector Graph Ordinal to Lucene Document ID Mapping</h2>
+ *
+ * <p>
+ * JVector keeps its own ordinals to identify its nodes. Those ordinals can be different from the Lucene document IDs.
+ * Document IDs in Lucene can change after a merge operation. Therefore, we need to maintain a mapping between
+ * JVector ordinals and Lucene document IDs that can hold across merges.
+ * <p>
+ * Document IDs in Lucene are mapped across merges and sorts using the {@link org.apache.lucene.index.MergeState.DocMap} for merges and {@link org.apache.lucene.index.Sorter.DocMap} for flush/sorts.
+ * For jVector however, we don't want to modify the ordinals in the jVector graph, and therefore we need to maintain a mapping between the jVector ordinals and the new Lucene document IDs.
+ * This is achieved by keeping checkpoints of the {@link GraphNodeIdToDocMap} class in the index metadata and allowing us to update the mapping as needed across merges by constructing a new mapping from the previous mapping and the {@link MergeState.DocMap} provided in the {@link MergeState}.
+ * And across sorts with {@link GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
+ * <p>
+ *
+ */
+@Log4j2
+public class JVectorWriter extends KnnVectorsWriter {
+    private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(JVectorWriter.class);
+
+    private final List<FieldWriter<?>> fields = new ArrayList<>();
+
+    private final IndexOutput meta;
+    private final IndexOutput vectorIndex;
+    private final String indexDataFileName;
+    private final String baseDataFileName;
+    private final SegmentWriteState segmentWriteState;
+    private final int maxConn;
+    private final int beamWidth;
+    private final float degreeOverflow;
+    private final float alpha;
+    private final Function<Integer, Integer> numberOfSubspacesPerVectorSupplier; // Number of subspaces used per vector for PQ quantization
+                                                                                 // as a function of the original dimension
+    private final int minimumBatchSizeForQuantization; // Threshold for the vector count above which we will trigger PQ quantization
+    private final boolean hierarchyEnabled;
+
+    private boolean finished = false;
+
+    public JVectorWriter(
+        SegmentWriteState segmentWriteState,
+        int maxConn,
+        int beamWidth,
+        float degreeOverflow,
+        float alpha,
+        Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+        int minimumBatchSizeForQuantization,
+        boolean hierarchyEnabled
+    ) throws IOException {
+        this.segmentWriteState = segmentWriteState;
+        this.maxConn = maxConn;
+        this.beamWidth = beamWidth;
+        this.degreeOverflow = degreeOverflow;
+        this.alpha = alpha;
+        this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
+        this.minimumBatchSizeForQuantization = minimumBatchSizeForQuantization;
+        this.hierarchyEnabled = hierarchyEnabled;
+        String metaFileName = IndexFileNames.segmentFileName(
+            segmentWriteState.segmentInfo.name,
+            segmentWriteState.segmentSuffix,
+            JVectorFormat.META_EXTENSION
+        );
+
+        this.indexDataFileName = IndexFileNames.segmentFileName(
+            segmentWriteState.segmentInfo.name,
+            segmentWriteState.segmentSuffix,
+            JVectorFormat.VECTOR_INDEX_EXTENSION
+        );
+        this.baseDataFileName = segmentWriteState.segmentInfo.name + "_" + segmentWriteState.segmentSuffix;
+
+        boolean success = false;
+        try {
+            meta = segmentWriteState.directory.createOutput(metaFileName, segmentWriteState.context);
+            vectorIndex = segmentWriteState.directory.createOutput(indexDataFileName, segmentWriteState.context);
+            CodecUtil.writeIndexHeader(
+                meta,
+                JVectorFormat.META_CODEC_NAME,
+                JVectorFormat.VERSION_CURRENT,
+                segmentWriteState.segmentInfo.getId(),
+                segmentWriteState.segmentSuffix
+            );
+
+            CodecUtil.writeIndexHeader(
+                vectorIndex,
+                JVectorFormat.VECTOR_INDEX_CODEC_NAME,
+                JVectorFormat.VERSION_CURRENT,
+                segmentWriteState.segmentInfo.getId(),
+                segmentWriteState.segmentSuffix
+            );
+
+            success = true;
+        } finally {
+            if (!success) {
+                IOUtils.closeWhileHandlingException(this);
+            }
+        }
+    }
+
+    @Override
+    public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
+        log.info("Adding field {} in segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
+        if (fieldInfo.getVectorEncoding() == VectorEncoding.BYTE) {
+            final String errorMessage = "byte[] vectors are not supported in JVector. "
+                + "Instead you should only use float vectors and leverage product quantization during indexing."
+                + "This can provides much greater savings in storage and memory";
+            log.error(errorMessage);
+            throw new UnsupportedOperationException(errorMessage);
+        }
+        FieldWriter<?> newField = new FieldWriter<>(fieldInfo, segmentWriteState.segmentInfo.name);
+
+        fields.add(newField);
+        return newField;
+    }
+
+    @Override
+    public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+        log.info("Merging field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
+        try {
+            final long mergeStart = Clock.systemDefaultZone().millis();
+            switch (fieldInfo.getVectorEncoding()) {
+                case BYTE:
+                    throw new UnsupportedEncodingException("Byte vectors are not supported in JVector.");
+                case FLOAT32:
+                    final var mergeRavv = new RandomAccessMergedFloatVectorValues(fieldInfo, mergeState);
+                    mergeRavv.merge();
+                    break;
+            }
+            final long mergeEnd = Clock.systemDefaultZone().millis();
+            final long mergeTime = mergeEnd - mergeStart;
+            KNNCounter.KNN_GRAPH_MERGE_TIME.add(mergeTime);
+            log.info("Completed Merge field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
+        } catch (Exception e) {
+            log.error("Error merging field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name, e);
+            throw e;
+        }
+    }
+
+    @Override
+    public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
+        log.info("Flushing {} fields", fields.size());
+
+        log.info("Flushing jVector graph index");
+        for (FieldWriter<?> field : fields) {
+            final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
+            final int[] newToOldOrds = new int[randomAccessVectorValues.size()];
+            for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
+                newToOldOrds[ord] = ord;
+            }
+            final BuildScoreProvider buildScoreProvider;
+            final PQVectors pqVectors;
+            final FieldInfo fieldInfo = field.fieldInfo;
+            if (randomAccessVectorValues.size() >= minimumBatchSizeForQuantization) {
+                log.info("Calculating codebooks and compressed vectors for field {}", fieldInfo.name);
+                pqVectors = getPQVectors(newToOldOrds, randomAccessVectorValues, fieldInfo);
+                buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(getVectorSimilarityFunction(fieldInfo), pqVectors);
+            } else {
+                log.info(
+                    "Vector count: {}, less than limit to trigger PQ quantization: {}, for field {}, will use full precision vectors instead.",
+                    randomAccessVectorValues.size(),
+                    minimumBatchSizeForQuantization,
+                    fieldInfo.name
+                );
+                pqVectors = null;
+                buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(
+                    randomAccessVectorValues,
+                    getVectorSimilarityFunction(fieldInfo)
+                );
+            }
+
+            // Generate the ord to doc mapping
+            final int[] ordinalsToDocIds = new int[randomAccessVectorValues.size()];
+            for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
+                ordinalsToDocIds[ord] = field.docIds.get(ord);
+            }
+            final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(ordinalsToDocIds);
+            if (sortMap != null) {
+                graphNodeIdToDocMap.update(sortMap);
+            }
+
+            OnHeapGraphIndex graph = getGraph(
+                buildScoreProvider,
+                randomAccessVectorValues,
+                newToOldOrds,
+                fieldInfo,
+                segmentWriteState.segmentInfo.name,
+                SIMD_POOL_FLUSH
+            );
+            writeField(field.fieldInfo, field.randomAccessVectorValues, pqVectors, newToOldOrds, graphNodeIdToDocMap, graph);
+
+        }
+    }
+
+    private void writeField(
+        FieldInfo fieldInfo,
+        RandomAccessVectorValues randomAccessVectorValues,
+        PQVectors pqVectors,
+        int[] newToOldOrds,
+        GraphNodeIdToDocMap graphNodeIdToDocMap,
+        OnHeapGraphIndex graph
+    ) throws IOException {
+        log.info(
+            "Writing field {} with vector count: {}, for segment: {}",
+            fieldInfo.name,
+            randomAccessVectorValues.size(),
+            segmentWriteState.segmentInfo.name
+        );
+        final var vectorIndexFieldMetadata = writeGraph(
+            graph,
+            randomAccessVectorValues,
+            fieldInfo,
+            pqVectors,
+            newToOldOrds,
+            graphNodeIdToDocMap
+        );
+        meta.writeInt(fieldInfo.number);
+        vectorIndexFieldMetadata.toOutput(meta);
+
+        log.info("Writing neighbors score cache for field {}", fieldInfo.name);
+        // field data file, which contains the graph
+        final String neighborsScoreCacheIndexFieldFileName = baseDataFileName
+            + "_"
+            + fieldInfo.name
+            + "."
+            + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
+        try (
+            IndexOutput indexOutput = segmentWriteState.directory.createOutput(
+                neighborsScoreCacheIndexFieldFileName,
+                segmentWriteState.context
+            );
+            final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)
+        ) {
+            CodecUtil.writeIndexHeader(
+                indexOutput,
+                JVectorFormat.NEIGHBORS_SCORE_CACHE_CODEC_NAME,
+                JVectorFormat.VERSION_CURRENT,
+                segmentWriteState.segmentInfo.getId(),
+                segmentWriteState.segmentSuffix
+            );
+            graph.save(jVectorIndexWriter);
+            CodecUtil.writeFooter(indexOutput);
+        }
+    }
+
+    /**
+     * Writes the graph and PQ codebooks and compressed vectors to the vector index file
+     * @param graph graph
+     * @param randomAccessVectorValues random access vector values
+     * @param fieldInfo field info
+     * @return Tuple of start offset and length of the graph
+     * @throws IOException IOException
+     */
+    private VectorIndexFieldMetadata writeGraph(
+        OnHeapGraphIndex graph,
+        RandomAccessVectorValues randomAccessVectorValues,
+        FieldInfo fieldInfo,
+        PQVectors pqVectors,
+        int[] newToOldOrds,
+        GraphNodeIdToDocMap graphNodeIdToDocMap
+    ) throws IOException {
+        // field data file, which contains the graph
+        final String vectorIndexFieldFileName = baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
+
+        try (
+            IndexOutput indexOutput = segmentWriteState.directory.createOutput(vectorIndexFieldFileName, segmentWriteState.context);
+            final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)
+        ) {
+            // Header for the field data file
+            CodecUtil.writeIndexHeader(
+                indexOutput,
+                JVectorFormat.VECTOR_INDEX_CODEC_NAME,
+                JVectorFormat.VERSION_CURRENT,
+                segmentWriteState.segmentInfo.getId(),
+                segmentWriteState.segmentSuffix
+            );
+            final long startOffset = indexOutput.getFilePointer();
+
+            log.info("Writing graph to {}", vectorIndexFieldFileName);
+            var resultBuilder = VectorIndexFieldMetadata.builder()
+                .fieldNumber(fieldInfo.number)
+                .vectorEncoding(fieldInfo.getVectorEncoding())
+                .vectorSimilarityFunction(fieldInfo.getVectorSimilarityFunction())
+                .vectorDimension(randomAccessVectorValues.dimension())
+                .graphNodeIdToDocMap(graphNodeIdToDocMap);
+
+            try (
+                var writer = new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter).with(
+                    new InlineVectors(randomAccessVectorValues.dimension())
+                ).build()
+            ) {
+                var suppliers = Feature.singleStateFactory(
+                    FeatureId.INLINE_VECTORS,
+                    nodeId -> new InlineVectors.State(randomAccessVectorValues.getVector(newToOldOrds[nodeId]))
+                );
+                writer.write(suppliers);
+                long endGraphOffset = jVectorIndexWriter.position();
+                resultBuilder.vectorIndexOffset(startOffset);
+                resultBuilder.vectorIndexLength(endGraphOffset - startOffset);
+
+                // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed vectors
+                if (pqVectors != null) {
+                    log.info(
+                        "Writing PQ codebooks and vectors for field {} since the size is {} >= {}",
+                        fieldInfo.name,
+                        randomAccessVectorValues.size(),
+                        minimumBatchSizeForQuantization
+                    );
+                    resultBuilder.pqCodebooksAndVectorsOffset(endGraphOffset);
+                    // write the compressed vectors and codebooks to disk
+                    pqVectors.write(jVectorIndexWriter);
+                    resultBuilder.pqCodebooksAndVectorsLength(jVectorIndexWriter.position() - endGraphOffset);
+                } else {
+                    resultBuilder.pqCodebooksAndVectorsOffset(0);
+                    resultBuilder.pqCodebooksAndVectorsLength(0);
+                }
+                CodecUtil.writeFooter(indexOutput);
+            }
+
+            return resultBuilder.build();
+        }
+    }
+
+    private PQVectors getPQVectors(int[] newToOldOrds, RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo)
+        throws IOException {
+        final String fieldName = fieldInfo.name;
+        final VectorSimilarityFunction vectorSimilarityFunction = fieldInfo.getVectorSimilarityFunction();
+        log.info("Computing PQ codebooks for field {} for {} vectors", fieldName, randomAccessVectorValues.size());
+        final long start = Clock.systemDefaultZone().millis();
+        final var M = numberOfSubspacesPerVectorSupplier.apply(randomAccessVectorValues.dimension());
+        final var numberOfClustersPerSubspace = Math.min(256, randomAccessVectorValues.size()); // number of centroids per
+        // subspace
+        ProductQuantization pq = ProductQuantization.compute(
+            randomAccessVectorValues,
+            M, // number of subspaces
+            numberOfClustersPerSubspace, // number of centroids per subspace
+            vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN, // center the dataset
+            UNWEIGHTED,
+            SIMD_POOL_MERGE,
+            ForkJoinPool.commonPool()
+        );
+
+        final long end = Clock.systemDefaultZone().millis();
+        final long trainingTime = end - start;
+        log.info("Computed PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
+        KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
+        log.info("Encoding and building PQ vectors for field {} for {} vectors", fieldName, randomAccessVectorValues.size());
+        // PQVectors pqVectors = pq.encodeAll(randomAccessVectorValues, SIMD_POOL);
+        PQVectors pqVectors = PQVectors.encodeAndBuild(pq, newToOldOrds.length, newToOldOrds, randomAccessVectorValues, SIMD_POOL_MERGE);
+        log.info(
+            "Encoded and built PQ vectors for field {}, original size: {} bytes, compressed size: {} bytes",
+            fieldName,
+            pqVectors.getOriginalSize(),
+            pqVectors.getCompressedSize()
+        );
+        return pqVectors;
+    }
+
+    @Value
+    @Builder(toBuilder = true)
+    @AllArgsConstructor
+    public static class VectorIndexFieldMetadata {
+        int fieldNumber;
+        VectorEncoding vectorEncoding;
+        VectorSimilarityFunction vectorSimilarityFunction;
+        int vectorDimension;
+        long vectorIndexOffset;
+        long vectorIndexLength;
+        long pqCodebooksAndVectorsOffset;
+        long pqCodebooksAndVectorsLength;
+        float degreeOverflow; // important when leveraging cache
+        GraphNodeIdToDocMap graphNodeIdToDocMap;
+
+        public void toOutput(IndexOutput out) throws IOException {
+            out.writeInt(fieldNumber);
+            out.writeInt(vectorEncoding.ordinal());
+            out.writeInt(JVectorReader.VectorSimilarityMapper.distFuncToOrd(vectorSimilarityFunction));
+            out.writeVInt(vectorDimension);
+            out.writeVLong(vectorIndexOffset);
+            out.writeVLong(vectorIndexLength);
+            out.writeVLong(pqCodebooksAndVectorsOffset);
+            out.writeVLong(pqCodebooksAndVectorsLength);
+            out.writeInt(Float.floatToIntBits(degreeOverflow));
+            graphNodeIdToDocMap.toOutput(out);
+        }
+
+        public VectorIndexFieldMetadata(IndexInput in) throws IOException {
+            this.fieldNumber = in.readInt();
+            this.vectorEncoding = readVectorEncoding(in);
+            this.vectorSimilarityFunction = JVectorReader.VectorSimilarityMapper.ordToLuceneDistFunc(in.readInt());
+            this.vectorDimension = in.readVInt();
+            this.vectorIndexOffset = in.readVLong();
+            this.vectorIndexLength = in.readVLong();
+            this.pqCodebooksAndVectorsOffset = in.readVLong();
+            this.pqCodebooksAndVectorsLength = in.readVLong();
+            this.degreeOverflow = Float.intBitsToFloat(in.readInt());
+            this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(in);
+        }
+
+    }
+
+    @Override
+    public void finish() throws IOException {
+        log.info("Finishing segment {}", segmentWriteState.segmentInfo.name);
+        if (finished) {
+            throw new IllegalStateException("already finished");
+        }
+        finished = true;
+
+        if (meta != null) {
+            // write end of fields marker
+            meta.writeInt(-1);
+            CodecUtil.writeFooter(meta);
+        }
+
+        if (vectorIndex != null) {
+            CodecUtil.writeFooter(vectorIndex);
+        }
+
+    }
+
+    @Override
+    public void close() throws IOException {
+        IOUtils.close(meta, vectorIndex);
+    }
+
+    @Override
+    public long ramBytesUsed() {
+        long total = SHALLOW_RAM_BYTES_USED;
+        for (FieldWriter<?> field : fields) {
+            // the field tracks the delegate field usage
+            total += field.ramBytesUsed();
+        }
+        return total;
+    }
+
+    /**
+     * The FieldWriter class is responsible for writing vector field data into index segments.
+     * It provides functionality to process vector values as those being added, manage memory usage, and build HNSW graph
+     * indexing structures for efficient retrieval during search queries.
+     *
+     * @param <T> The type of vector value to be handled by the writer.
+     * This is often specialized to support specific implementations, such as float[] or byte[] vectors.
+     */
+    static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
+        private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
+        private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
+        @Getter
+        private final FieldInfo fieldInfo;
+        private int lastDocID = -1;
+        private final String segmentName;
+        private final RandomAccessVectorValues randomAccessVectorValues;
+        // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to the jVector ordinal
+        private final List<VectorFloat<?>> vectors = new ArrayList<>();
+        private final List<Integer> docIds = new ArrayList<>();
+
+        FieldWriter(FieldInfo fieldInfo, String segmentName) {
+            /**
+             * For creating a new field from a flat field vectors writer.
+             */
+            this.randomAccessVectorValues = new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
+            this.fieldInfo = fieldInfo;
+            this.segmentName = segmentName;
+        }
+
+        @Override
+        public void addValue(int docID, T vectorValue) throws IOException {
+            log.trace("Adding value {} to field {} in segment {}", vectorValue, fieldInfo.name, segmentName);
+            if (docID == lastDocID) {
+                throw new IllegalArgumentException(
+                    "VectorValuesField \""
+                        + fieldInfo.name
+                        + "\" appears more than once in this document (only one value is allowed per field)"
+                );
+            }
+            docIds.add(docID);
+            if (vectorValue instanceof float[]) {
+                vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(vectorValue));
+            } else if (vectorValue instanceof byte[]) {
+                final String errorMessage = "byte[] vectors are not supported in JVector. "
+                    + "Instead you should only use float vectors and leverage product quantization during indexing."
+                    + "This can provides much greater savings in storage and memory";
+                log.error("{}", errorMessage);
+                throw new UnsupportedOperationException(errorMessage);
+            } else {
+                throw new IllegalArgumentException("Unsupported vector type: " + vectorValue.getClass());
+            }
+
+            lastDocID = docID;
+        }
+
+        @Override
+        public T copyValue(T vectorValue) {
+            throw new UnsupportedOperationException("copyValue not supported");
+        }
+
+        @Override
+        public long ramBytesUsed() {
+            return SHALLOW_SIZE + (long) vectors.size() * fieldInfo.getVectorDimension() * Float.BYTES;
+        }
+
+    }
+
+    static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimilarityFunction(FieldInfo fieldInfo) {
+        log.info("Matching vector similarity function {} for field {}", fieldInfo.getVectorSimilarityFunction(), fieldInfo.name);
+        return switch (fieldInfo.getVectorSimilarityFunction()) {
+            case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
+            case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
+            case DOT_PRODUCT -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.DOT_PRODUCT;
+            default -> throw new IllegalArgumentException("Unsupported similarity function: " + fieldInfo.getVectorSimilarityFunction());
+        };
+    }
+
+    /**
+     * Implementation of RandomAccessVectorValues that directly uses the source
+     * FloatVectorValues from multiple segments without copying the vectors.
+     *
+     * Some details about the implementation logic:
+     *
+     * First, we identify the leading reader, which is the one with the most live vectors.
+     * Second, we build a mapping between the ravv ordinals and the reader index and the ordinal in that reader.
+     * Third, we build a mapping between the ravv ordinals and the global doc ids.
+     *
+     * Very important to note that for the leading graph the node Ids need to correspond to their original ravv ordinals in the reader.
+     * This is because we are later going to expand that graph with new vectors from the other readers.
+     * While the new vectors can be assigned arbitrary node Ids, the leading graph needs to preserve its original node Ids and map them to the original ravv vector ordinals.
+     */
+    class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
+        private static final int READER_ID = 0;
+        private static final int READER_ORD = 1;
+        private static final int LEADING_READER_IDX = 0;
+
+        private final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
+
+        // Array of sub-readers
+        private final KnnVectorsReader[] readers;
+        private final JVectorFloatVectorValues[] perReaderFloatVectorValues;
+
+        // Maps the ravv ordinals to the reader index and the ordinal in that reader. This is allowing us to get a unified view of all the
+        // vectors in all the readers with a single unified ordinal space.
+        private final int[][] ravvOrdToReaderMapping;
+
+        // Total number of vectors
+        private final int size;
+        // Total number of documents including those without values
+        private final int totalDocsCount;
+
+        // Vector dimension
+        private final int dimension;
+        private final FieldInfo fieldInfo;
+        private final MergeState mergeState;
+        private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+        private final int[] graphNodeIdsToRavvOrds;
+        private boolean deletesFound = false;
+
+        /**
+         * Creates a random access view over merged float vector values.
+         *
+         * @param fieldInfo Field info for the vector field
+         * @param mergeState Merge state containing readers and doc maps
+         */
+        public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+            this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
+            this.fieldInfo = fieldInfo;
+            this.mergeState = mergeState;
+
+            final String fieldName = fieldInfo.name;
+
+            // Count total vectors, collect readers and identify leading reader, collect base ordinals to later be used to build the mapping
+            // between global ordinals and global lucene doc ids
+            int totalVectorsCount = 0;
+            int totalLiveVectorsCount = 0;
+            int dimension = 0;
+            int tempLeadingReaderIdx = -1;
+            int vectorsCountInLeadingReader = -1;
+            List<KnnVectorsReader> allReaders = new ArrayList<>();
+            final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
+            final Bits[] liveDocs = mergeState.liveDocs.clone();
+            final int[] baseOrds = new int[mergeState.knnVectorsReaders.length];
+            final int[] deletedOrds = new int[mergeState.knnVectorsReaders.length]; // counts the number of deleted documents in each reader
+                                                                                    // that previously had a vector
+
+            // Find the leading reader, count the total number of live vectors, and the base ordinals for each reader
+            for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
+                FieldInfos fieldInfos = mergeState.fieldInfos[i];
+                baseOrds[i] = totalVectorsCount;
+                if (MergedVectorValues.hasVectorValues(fieldInfos, fieldName)) {
+                    KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
+                    if (reader != null) {
+                        FloatVectorValues values = reader.getFloatVectorValues(fieldName);
+                        if (values != null) {
+                            allReaders.add(reader);
+                            int vectorCountInReader = values.size();
+                            int liveVectorCountInReader = 0;
+                            KnnVectorValues.DocIndexIterator it = values.iterator();
+                            while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                                if (liveDocs[i] == null || liveDocs[i].get(it.docID())) {
+                                    liveVectorCountInReader++;
+                                } else {
+                                    deletedOrds[i]++;
+                                    deletesFound = true;
+                                }
+                            }
+                            if (liveVectorCountInReader >= vectorsCountInLeadingReader) {
+                                vectorsCountInLeadingReader = liveVectorCountInReader;
+                                tempLeadingReaderIdx = i;
+                            }
+                            totalVectorsCount += vectorCountInReader;
+                            totalLiveVectorsCount += liveVectorCountInReader;
+                            dimension = Math.max(dimension, values.dimension());
+                        }
+                    }
+                }
+            }
+
+            assert (totalVectorsCount <= totalDocsCount) : "Total number of vectors exceeds the total number of documents";
+            assert (totalLiveVectorsCount <= totalVectorsCount) : "Total number of live vectors exceeds the total number of vectors";
+            assert (dimension > 0) : "No vectors found for field " + fieldName;
+
+            this.size = totalVectorsCount;
+            this.readers = new KnnVectorsReader[allReaders.size()];
+            for (int i = 0; i < readers.length; i++) {
+                readers[i] = allReaders.get(i);
+            }
+
+            // always swap the leading reader to the first position
+            // For this part we need to make sure we also swap all the other metadata arrays that are indexed by reader index
+            // Such as readers, docMaps, liveDocs, baseOrds, deletedOrds
+            if (tempLeadingReaderIdx != 0) {
+                final KnnVectorsReader temp = readers[LEADING_READER_IDX];
+                readers[LEADING_READER_IDX] = readers[tempLeadingReaderIdx];
+                readers[tempLeadingReaderIdx] = temp;
+                // also swap the leading doc map to the first position to match the readers
+                final MergeState.DocMap tempDocMap = docMaps[LEADING_READER_IDX];
+                docMaps[LEADING_READER_IDX] = docMaps[tempLeadingReaderIdx];
+                docMaps[tempLeadingReaderIdx] = tempDocMap;
+                // swap base ords
+                final int tempBaseOrd = baseOrds[LEADING_READER_IDX];
+                baseOrds[LEADING_READER_IDX] = baseOrds[tempLeadingReaderIdx];
+                baseOrds[tempLeadingReaderIdx] = tempBaseOrd;
+            }
+
+            this.perReaderFloatVectorValues = new JVectorFloatVectorValues[readers.length];
+            this.dimension = dimension;
+
+            // Build mapping from global ordinal to [readerIndex, readerOrd]
+            this.ravvOrdToReaderMapping = new int[totalDocsCount][2];
+
+            int documentsIterated = 0;
+
+            // Will be used to build the new graphNodeIdToDocMap with the new graph node id to docId mapping.
+            // This mapping should not be used to access the vectors at any time during construction, but only after the merge is complete
+            // and the new segment is created and used by searchers.
+            final int[] graphNodeIdToDocIds = new int[totalLiveVectorsCount];
+            this.graphNodeIdsToRavvOrds = new int[totalLiveVectorsCount];
+
+            int graphNodeId = 0;
+            if (deletesFound) {
+                // If there are deletes, we need to build a new graph from scratch and compact the graph node ids
+                // TODO: remove this logic once we support incremental graph building with deletes see
+                // https://github.com/opensearch-project/opensearch-jvector/issues/171
+                for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
+                    final JVectorFloatVectorValues values = (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
+                    perReaderFloatVectorValues[readerIdx] = values;
+                    // For each vector in this reader
+                    KnnVectorValues.DocIndexIterator it = values.iterator();
+
+                    for (int docId = it.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = it.nextDoc()) {
+                        if (docMaps[readerIdx].get(docId) == -1) {
+                            log.warn(
+                                "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
+                                docId,
+                                readerIdx
+                            );
+                        } else {
+                            // Mapping from ravv ordinals to [readerIndex, readerOrd]
+                            // Map graph node id to ravv ordinal
+                            // Map graph node id to doc id
+                            final int newGlobalDocId = docMaps[readerIdx].get(docId);
+                            final int ravvLocalOrd = it.index();
+                            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
+                            graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
+                            graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
+                            graphNodeId++;
+                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
+                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
+                        }
+
+                        documentsIterated++;
+                    }
+                }
+            } else {
+                // If there are no deletes, we can reuse the existing graph and simply remap the ravv ordinals to the new global doc ids
+                // for the leading reader we must preserve the original node Ids and map them to the corresponding ravv vectors originally
+                // used to build the graph
+                // This is necessary because we are later going to expand that graph with new vectors from the other readers.
+                // The leading reader is ALWAYS the first one in the readers array
+                final JVectorFloatVectorValues leadingReaderValues = (JVectorFloatVectorValues) readers[LEADING_READER_IDX]
+                    .getFloatVectorValues(fieldName);
+                perReaderFloatVectorValues[LEADING_READER_IDX] = leadingReaderValues;
+                var leadingReaderIt = leadingReaderValues.iterator();
+                for (int docId = leadingReaderIt.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = leadingReaderIt.nextDoc()) {
+                    final int newGlobalDocId = docMaps[LEADING_READER_IDX].get(docId);
+                    if (newGlobalDocId == -1) {
+                        log.warn(
+                            "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
+                            docId,
+                            LEADING_READER_IDX
+                        );
+                    } else {
+                        final int ravvLocalOrd = leadingReaderIt.index();
+                        final int ravvGlobalOrd = ravvLocalOrd + baseOrds[LEADING_READER_IDX];
+                        graphNodeIdToDocIds[ravvLocalOrd] = newGlobalDocId;
+                        graphNodeIdsToRavvOrds[ravvLocalOrd] = ravvGlobalOrd;
+                        graphNodeId++;
+                        ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = LEADING_READER_IDX; // Reader index
+                        ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
+                    }
+
+                    documentsIterated++;
+                }
+
+                // For the remaining readers we map the graph node id to the ravv ordinal in the order they appear
+                for (int readerIdx = 1; readerIdx < readers.length; readerIdx++) {
+                    final JVectorFloatVectorValues values = (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
+                    perReaderFloatVectorValues[readerIdx] = values;
+                    // For each vector in this reader
+                    KnnVectorValues.DocIndexIterator it = values.iterator();
+
+                    for (int docId = it.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = it.nextDoc()) {
+                        if (docMaps[readerIdx].get(docId) == -1) {
+                            log.warn(
+                                "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
+                                docId,
+                                readerIdx
+                            );
+                        } else {
+                            // Mapping from ravv ordinals to [readerIndex, readerOrd]
+                            // Map graph node id to ravv ordinal
+                            // Map graph node id to doc id
+                            final int newGlobalDocId = docMaps[readerIdx].get(docId);
+                            final int ravvLocalOrd = it.index();
+                            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
+                            graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
+                            graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
+                            graphNodeId++;
+                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
+                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
+                        }
+
+                        documentsIterated++;
+                    }
+                }
+            }
+
+            if (documentsIterated < totalVectorsCount) {
+                throw new IllegalStateException(
+                    "More documents were expected than what was found in the readers."
+                        + "Expected at least number of total vectors: "
+                        + totalVectorsCount
+                        + " but found only: "
+                        + documentsIterated
+                        + " documents."
+                );
+            }
+
+            this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(graphNodeIdToDocIds);
+            log.debug("Created RandomAccessMergedFloatVectorValues with {} total vectors from {} readers", size, readers.length);
+
+        }
+
+        /**
+         * Merges the float vector values from multiple readers into a unified structure.
+         * This process includes handling product quantization (PQ) for vector compression,
+         * generating ord-to-doc mappings, and writing the merged index into a new segment file.
+         * <p>
+         * The method determines if pre-existing product quantization codebooks are available
+         * from the leading reader. If available, it refines them using remaining vectors
+         * from other readers in the merge. If no pre-existing codebooks are found and
+         * the total vector count meets the required minimum threshold, new codebooks
+         * and compressed vectors are computed. Otherwise, no PQ compression is applied.
+         * <p>
+         * Also, it generates a mapping of ordinals to document IDs by iterating through
+         * the provided vector data, which is further used to write the field data.
+         * <p>
+         * In the event of no deletes or quantization, the graph construction is done by incrementally adding vectors from smaller segments into the largest segment.
+         * For all other cases, we build a new graph from scratch from all the vectors.
+         *
+         * TODO: Add support for incremental graph building with quantization see <a href="https://github.com/opensearch-project/opensearch-jvector/issues/166">issue</a>
+         *
+         * @throws IOException if there is an issue during reading or writing vector data.
+         */
+        public void merge() throws IOException {
+            // This section creates the PQVectors to be used for this merge
+            // Get PQ compressor for leading reader
+            final int totalVectorsCount = size;
+            final String fieldName = fieldInfo.name;
+            final PQVectors pqVectors;
+            final OnHeapGraphIndex graph;
+            // Get the leading reader
+            PerFieldKnnVectorsFormat.FieldsReader fieldsReader = (PerFieldKnnVectorsFormat.FieldsReader) readers[LEADING_READER_IDX];
+            JVectorReader leadingReader = (JVectorReader) fieldsReader.getFieldReader(fieldName);
+            final BuildScoreProvider buildScoreProvider;
+            // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the remaining vectors
+            if (leadingReader.getProductQuantizationForField(fieldInfo.name).isEmpty()) {
+                // No pre-existing codebooks, check if we have enough vectors to trigger quantization
+                log.info(
+                    "No Pre-existing PQ codebooks found in this merge for field {} in segment {}, will check if a new codebooks is necessary",
+                    fieldName,
+                    mergeState.segmentInfo.name
+                );
+                if (this.size() >= minimumBatchSizeForQuantization) {
+                    log.info(
+                        "Calculating new codebooks and compressed vectors for field: {}, with totalVectorCount: {}, above minimumBatchSizeForQuantization: {}",
+                        fieldName,
+                        totalVectorsCount,
+                        minimumBatchSizeForQuantization
+                    );
+                    pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
+                } else {
+                    log.info(
+                        "Not enough vectors found for field: {}, totalVectorCount: {}, is below minimumBatchSizeForQuantization: {}",
+                        fieldName,
+                        totalVectorsCount,
+                        minimumBatchSizeForQuantization
+                    );
+                    pqVectors = null;
+                }
+            } else {
+                log.info(
+                    "Pre-existing PQ codebooks found in this merge for field {} in segment {}, will refine the codebooks from the leading reader with the remaining vectors",
+                    fieldName,
+                    mergeState.segmentInfo.name
+                );
+                final long start = Clock.systemDefaultZone().millis();
+                ProductQuantization leadingCompressor = leadingReader.getProductQuantizationForField(fieldName).get();
+                // Refine the leadingCompressor with the remaining vectors in the merge, we skip the leading reader since it's already been
+                // used to create the leadingCompressor
+                // We assume the leading reader is ALWAYS the first one in the readers array
+                for (int i = LEADING_READER_IDX + 1; i < readers.length; i++) {
+                    final FloatVectorValues values = readers[i].getFloatVectorValues(fieldName);
+                    final RandomAccessVectorValues randomAccessVectorValues = new RandomAccessVectorValuesOverVectorValues(values);
+                    leadingCompressor.refine(randomAccessVectorValues);
+                }
+                final long end = Clock.systemDefaultZone().millis();
+                final long trainingTime = end - start;
+                log.info("Refined PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
+                KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
+                pqVectors = PQVectors.encodeAndBuild(
+                    leadingCompressor,
+                    graphNodeIdsToRavvOrds.length,
+                    graphNodeIdsToRavvOrds,
+                    this,
+                    SIMD_POOL_MERGE
+                );
+            }
+
+            if (pqVectors == null) {
+                buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(
+                    this,
+                    graphNodeIdsToRavvOrds,
+                    getVectorSimilarityFunction(fieldInfo)
+                );
+                // graph = getGraph(buildScoreProvider, this, newToOldOrds, fieldInfo, segmentWriteState.segmentInfo.name);
+                if (!deletesFound) {
+                    final String segmentName = segmentWriteState.segmentInfo.name;
+                    log.info(
+                        "No deletes found, and no PQ codebooks found, expanding previous graph with additional vectors for field {} in segment {}",
+                        fieldName,
+                        segmentName
+                    );
+                    final RandomAccessReader leadingOnHeapGraphReader = leadingReader.getNeighborsScoreCacheForField(fieldName);
+                    final int numBaseVectors = leadingReader.getFloatVectorValues(fieldName).size();
+                    graph = (OnHeapGraphIndex) GraphIndexBuilder.buildAndMergeNewNodes(
+                        leadingOnHeapGraphReader,
+                        this,
+                        buildScoreProvider,
+                        numBaseVectors,
+                        graphNodeIdsToRavvOrds,
+                        beamWidth,
+                        degreeOverflow,
+                        alpha,
+                        hierarchyEnabled
+                    );
+                } else {
+                    log.info("Deletes found, and no PQ codebooks found, building new graph from scratch");
+                    graph = getGraph(
+                        buildScoreProvider,
+                        this,
+                        graphNodeIdsToRavvOrds,
+                        fieldInfo,
+                        segmentWriteState.segmentInfo.name,
+                        SIMD_POOL_MERGE
+                    );
+                }
+            } else {
+                log.info("PQ codebooks found, building graph from scratch with PQ vectors");
+                buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(getVectorSimilarityFunction(fieldInfo), pqVectors);
+                // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD threads)
+                buildScoreProvider.diversityProviderFor(0);
+                graph = getGraph(
+                    buildScoreProvider,
+                    this,
+                    graphNodeIdsToRavvOrds,
+                    fieldInfo,
+                    segmentWriteState.segmentInfo.name,
+                    SIMD_POOL_MERGE
+                );
+            }
+
+            writeField(fieldInfo, this, pqVectors, graphNodeIdsToRavvOrds, graphNodeIdToDocMap, graph);
+        }
+
+        @Override
+        public int size() {
+            return size;
+        }
+
+        @Override
+        public int dimension() {
+            return dimension;
+        }
+
+        @Override
+        public VectorFloat<?> getVector(int ord) {
+            if (ord < 0 || ord >= totalDocsCount) {
+                throw new IllegalArgumentException("Ordinal out of bounds: " + ord);
+            }
+
+            final int readerIdx = ravvOrdToReaderMapping[ord][READER_ID];
+            final int readerOrd = ravvOrdToReaderMapping[ord][READER_ORD];
+
+            // Access to float values is not thread safe
+            synchronized (perReaderFloatVectorValues[readerIdx]) {
+                return perReaderFloatVectorValues[readerIdx].vectorFloatValue(readerOrd);
+            }
+        }
+
+        @Override
+        public boolean isValueShared() {
+            return false;
+        }
+
+        @Override
+        public RandomAccessVectorValues copy() {
+            throw new UnsupportedOperationException("Copy not supported");
+        }
+    }
+
+    /**
+     * This method will return the graph index for the field
+     * @return OnHeapGraphIndex
+     */
+    public OnHeapGraphIndex getGraph(
+        BuildScoreProvider buildScoreProvider,
+        RandomAccessVectorValues randomAccessVectorValues,
+        int[] newToOldOrds,
+        FieldInfo fieldInfo,
+        String segmentName,
+        ForkJoinPool SIMD_POOL
+    ) {
+        final GraphIndexBuilder graphIndexBuilder = new GraphIndexBuilder(
+            buildScoreProvider,
+            fieldInfo.getVectorDimension(),
+            maxConn,
+            beamWidth,
+            degreeOverflow,
+            alpha,
+            hierarchyEnabled
+        );
+
+        /*
+         * We cannot always use randomAccessVectorValues for the graph building
+         * because it's size will not always correspond to the document count.
+         * To have the right mapping from docId to vector ordinal we need to use the mergedFloatVector.
+         * This is the case when we are merging segments and we might have more documents than vectors.
+         */
+        final long start = Clock.systemDefaultZone().millis();
+        final OnHeapGraphIndex graphIndex;
+        var vv = randomAccessVectorValues.threadLocalSupplier();
+
+        log.info("Building graph from merged float vector");
+        // parallel graph construction from the merge documents Ids
+        SIMD_POOL.submit(() -> IntStream.range(0, newToOldOrds.length).parallel().forEach(ord -> {
+            graphIndexBuilder.addGraphNode(ord, vv.get().getVector(newToOldOrds[ord]));
+        })).join();
+        graphIndexBuilder.cleanup();
+        graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();
+        final long end = Clock.systemDefaultZone().millis();
+
+        log.info("Built graph for field {} in segment {} in {} millis", fieldInfo.name, segmentName, end - start);
+        return graphIndex;
+    }
+
+    static class RandomAccessVectorValuesOverVectorValues implements RandomAccessVectorValues {
+        private final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
+        private final FloatVectorValues values;
+
+        public RandomAccessVectorValuesOverVectorValues(FloatVectorValues values) {
+            this.values = values;
+        }
+
+        @Override
+        public int size() {
+            return values.size();
+        }
+
+        @Override
+        public int dimension() {
+            return values.dimension();
+        }
+
+        @Override
+        public VectorFloat<?> getVector(int nodeId) {
+            try {
+                // Access to float values is not thread safe
+                synchronized (this) {
+                    final float[] vector = values.vectorValue(nodeId);
+                    final float[] copy = new float[vector.length];
+                    System.arraycopy(vector, 0, copy, 0, vector.length);
+                    return VECTOR_TYPE_SUPPORT.createFloatVector(copy);
+                }
+            } catch (IOException e) {
+                log.error("Error retrieving vector at ordinal {}", nodeId, e);
+                throw new RuntimeException(e);
+            }
+        }
+
+        @Override
+        public boolean isValueShared() {
+            return false;
+        }
+
+        @Override
+        public RandomAccessVectorValues copy() {
+            throw new UnsupportedOperationException("Copy not supported");
+        }
+    }
+
+}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/package-info.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/package-info.java
new file mode 100644
index 000000000000..5f05b040c88a
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/package-info.java
@@ -0,0 +1,23 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * This package contains the implementation of the JVector codec, a Lucene codec for approximate
+ * nearest neighbor search using vector quantization and HNSW graph indexing. It is based on the
+ * OpenSearch JVector codec and optimized for Lucene.
+ */
+package org.apache.lucene.sandbox.codecs.jvector;
diff --git a/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat b/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
index 29a44d2ecfa8..84f11e50fd0a 100644
--- a/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
+++ b/lucene/sandbox/src/resources/META-INF/services/org.apache.lucene.codecs.KnnVectorsFormat
@@ -14,3 +14,4 @@
 #  limitations under the License.
 
 org.apache.lucene.sandbox.codecs.faiss.FaissKnnVectorsFormat
+org.apache.lucene.sandbox.codecs.jvector.JVectorFormat
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
new file mode 100644
index 000000000000..899663214405
--- /dev/null
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -0,0 +1,1557 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.knn.index.codec.jvector;
+
+import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.document.*;
+import org.apache.lucene.index.*;
+import org.apache.lucene.search.*;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.tests.index.RandomIndexWriter;
+import org.apache.lucene.tests.util.LuceneTestCase;
+import org.junit.Assert;
+import org.junit.Test;
+import org.opensearch.knn.TestUtils;
+import org.opensearch.knn.common.KNNConstants;
+import org.opensearch.knn.index.ThreadLeakFiltersForTests;
+import org.opensearch.knn.plugin.stats.KNNCounter;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
+
+import static org.opensearch.knn.common.KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+import static org.opensearch.knn.index.engine.CommonTestUtils.getCodec;
+
+/**
+ * Test used specifically for JVector
+ */
+// Currently {@link IndexGraphBuilder} is using the default ForkJoinPool.commonPool() which is not being shutdown.
+// Ignore thread leaks until we remove the ForkJoinPool.commonPool() usage from IndexGraphBuilder
+// TODO: Wire the execution thread pool to {@link IndexGraphBuilder} to avoid the failure of the UT due to leaked thread pool warning.
+@ThreadLeakFilters(defaultFilters = true, filters = { ThreadLeakFiltersForTests.class })
+@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")
+@Log4j2
+public class KNNJVectorTests extends LuceneTestCase {
+    private static final String TEST_FIELD = "test_field";
+    private static final String TEST_ID_FIELD = "id";
+
+    /**
+     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+     * in the index.
+     * Single field is used to store the vectors.
+     * All the documents are stored in a single segment.
+     * Single commit without refreshing the index.
+     * No merge.
+     */
+    @Test
+    public void testJVectorKnnIndex_simpleCase() throws IOException {
+        int k = 3; // The number of nearest neighbors to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f / i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                w.addDocument(doc);
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.commit();
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(9, topDocs.scoreDocs[0].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                assertEquals(8, topDocs.scoreDocs[1].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 9.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                assertEquals(7, topDocs.scoreDocs[2].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+        log.info("successfully closed directory");
+    }
+
+    /**
+     * Test the scenario when not all documents are populated with the vector field
+     */
+    public void testMissing_fields() throws IOException {
+        final int k = 3; // The number of nearest neighbors to gather
+        final int totalNumberOfDocs = 10;
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 0; i < totalNumberOfDocs; i++) {
+                final Document doc = new Document();
+                if (i % 2 == 0) {
+                    final float[] source = new float[] { 0.0f, i };
+                    doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+                }
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                w.addDocument(doc);
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.commit();
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(0, topDocs.scoreDocs[0].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 0.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                assertEquals(2, topDocs.scoreDocs[1].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                assertEquals(4, topDocs.scoreDocs[2].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 4.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+        log.info("successfully closed directory");
+    }
+
+    /**
+     * Test the scenario when the index is sorted by a doc value
+     * We want to make sure the docIDs are correctly mapped to the jVector ordinals
+     * @throws IOException if an I/O error occurs
+     */
+    public void test_sorted_index() throws IOException {
+        final int k = 3; // The number of nearest neighbors to gather
+        final int totalNumberOfDocs = 10;
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+        final String sortFieldName = "sorted_field";
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        // Add index sorting configuration
+        indexWriterConfig.setIndexSort(new Sort(new SortField(sortFieldName, SortField.Type.INT, true))); // true = reverse order
+
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 0; i < totalNumberOfDocs; i++) {
+                final Document doc = new Document();
+                final float[] source = new float[] { 0.0f, i };
+                doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                // Add the sortable field
+                doc.add(new NumericDocValuesField(sortFieldName, i));
+                w.addDocument(doc);
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.commit();
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(9, topDocs.scoreDocs[0].doc);
+                assertEquals(0, reader.storedFields().document(topDocs.scoreDocs[0].doc).getField(TEST_ID_FIELD).numericValue().intValue());
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 0.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                assertEquals(8, topDocs.scoreDocs[1].doc);
+                assertEquals(1, reader.storedFields().document(topDocs.scoreDocs[1].doc).getField(TEST_ID_FIELD).numericValue().intValue());
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                assertEquals(7, topDocs.scoreDocs[2].doc);
+                assertEquals(2, reader.storedFields().document(topDocs.scoreDocs[2].doc).getField(TEST_ID_FIELD).numericValue().intValue());
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+        log.info("successfully closed directory");
+    }
+
+    /**
+     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+     * in the index.
+     * Single field is used to store the vectors.
+     * Documents are stored in a multiple segments.
+     * Multiple commits without refreshing the index.
+     * No merge.
+     */
+    @Test
+    public void testJVectorKnnIndex_multipleSegments() throws IOException {
+        int k = 3; // The number of nearest neighbours to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f / i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                w.addDocument(doc);
+                w.commit(); // this creates a new segment
+            }
+            log.info("Done writing all files to the file system");
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have 10 segments, each with a single document");
+                Assert.assertEquals(10, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = new KnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(9, topDocs.scoreDocs[0].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                assertEquals(8, topDocs.scoreDocs[1].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 9.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                assertEquals(7, topDocs.scoreDocs[2].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+     * in the index.
+     * Single field is used to store the vectors.
+     * Documents are stored in a multiple segments.
+     * Multiple commits without refreshing the index.
+     * Merge is enabled.
+     */
+    @Test
+    public void testJVectorKnnIndex_mergeEnabled() throws IOException {
+        int k = 3; // The number of nearest neighbours to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f * i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
+                w.addDocument(doc);
+                w.commit(); // this creates a new segment without triggering a merge
+            }
+            log.info("Done writing all files to the file system");
+
+            w.forceMerge(1); // this merges all segments into a single segment
+            log.info("Done merging all segments");
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have 1 segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
+                assertEquals("1", doc.get("my_doc_id"));
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                doc = reader.storedFields().document(topDocs.scoreDocs[1].doc);
+                assertEquals("2", doc.get("my_doc_id"));
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                doc = reader.storedFields().document(topDocs.scoreDocs[2].doc);
+                assertEquals("3", doc.get("my_doc_id"));
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 3.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test to verify that the jVector codec is able to successfully search for the nearest neighbors
+     * in the index.
+     * Single field is used to store the vectors.
+     * Documents are stored in potentially multiple segments.
+     * Multiple commits.
+     * Multiple merges.
+     */
+    @Test
+    public void multipleMerges() throws IOException {
+        int k = 3; // The number of nearest neighbours to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+        final Path indexPath = createTempDir();
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f * i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+                doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
+                w.addDocument(doc);
+                w.commit(); // this creates a new segment without triggering a merge
+                w.forceMerge(1); // this merges all segments into a single segment
+            }
+            log.info("Done writing all files to the file system");
+
+            w.forceMerge(1); // this merges all segments into a single segment
+            log.info("Done merging all segments");
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have 1 segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
+                assertEquals("1", doc.get("my_doc_id"));
+                Assert.assertEquals(
+                    vectorSimilarityFunction.compare(target, new float[] { 0.0f, 1.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                doc = reader.storedFields().document(topDocs.scoreDocs[1].doc);
+                assertEquals("2", doc.get("my_doc_id"));
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                doc = reader.storedFields().document(topDocs.scoreDocs[2].doc);
+                assertEquals("3", doc.get("my_doc_id"));
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 3.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test to verify that the jVector codec is able to successfully search for the nearest neighbours
+     * in the index.
+     * A Single field is used to store the vectors.
+     * Documents are stored in potentially multiple segments.
+     * Multiple commits.
+     * Multiple merges.
+     * Large batches
+     * Use a compound file
+     */
+    @Test
+    public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization() throws IOException {
+        int segmentSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+        int totalNumberOfDocs = segmentSize * 4;
+        int k = 3; // The number of nearest neighbors to gather
+
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(true);
+        indexWriterConfig.setCodec(getCodec(Integer.MAX_VALUE)); // effectively without quantization
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f / i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
+                w.addDocument(doc);
+                if (i % segmentSize == 0) {
+                    w.commit(); // this creates a new segment without triggering a merge
+                }
+            }
+            log.info("Done writing all files to the file system");
+
+            w.forceMerge(1); // this merges all segments into a single segment
+            log.info("Done merging all segments");
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have 1 segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+
+                float expectedMinScoreInTopK = VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, k });
+                final float recall = calculateRecall(topDocs, expectedMinScoreInTopK);
+                Assert.assertEquals(1.0f, recall, 0.01f);
+
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Similar to testJVectorKnnIndex_multiple_merges_large_batches_no_quantization but with random vectors
+     * It's important to add more randomness to the vectors to make sure the graph is not linear
+     * @throws IOException if an I/O error occurs
+     */
+    @Test
+    public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization_with_random_vectors() throws IOException {
+        int segmentSize = 200;
+        int totalNumberOfDocs = segmentSize * 4;
+        int k = 3; // The number of nearest neighbors to gather
+        final int dimension = 2;
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+        final float[] target = TestUtils.generateRandomVectors(1, dimension)[0];
+        final float[][] source = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+        final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, source, k, vectorSimilarityFunction);
+
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(true);
+        indexWriterConfig.setCodec(getCodec(Integer.MAX_VALUE)); // effectively without quantization
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            for (int i = 0; i < source.length; i++) {
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField(TEST_FIELD, source[i], VectorSimilarityFunction.EUCLIDEAN));
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                w.addDocument(doc);
+                if (i % segmentSize == 0) {
+                    w.commit(); // this creates a new segment without triggering a merge
+                }
+            }
+            log.info("Done writing all files to the file system");
+
+            w.forceMerge(1); // this merges all segments into a single segment
+            log.info("Done merging all segments");
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+                Assert.assertEquals(1.0f, recall, 0.05f);
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Tests the functionality and integrity of a Lucene k-NN index under multiple merge cycles and verifies
+     *  the proper ordering of vectors and document identifiers.
+     *
+     * The method performs the following validation steps:
+     * 1. Indexes a predefined number of documents into a Lucene index, creating many small segments.
+     * Each document
+     *    includes a k-NN float vector field encoding a specific order.
+     * 2. Executes several merge operations on the index (partial and full merges) to validate that the merging
+     *    process maintains correctness and consistency.
+     * 3. Validates the following invariants post-merge:
+     *    (a) Verifies that the index is merged into a single segment.
+     *    (b) Confirms the integrity of vector values by iterating through the merged segment and checking the
+     *        relationship between vector components and document identifiers.
+     *    (c) Performs k-NN searches with various cases:
+     *        - Single-threaded searches using vectors to ensure correct results.
+     *        - Multi-threaded concurrent searches to confirm robustness and verify the index operates correctly
+     *          under concurrent access without exhausting file handles or encountering other issues.
+     *
+     * Assertions are used throughout to ensure the state of the index matches the expected behavior,
+     * validate merge
+     * results, and confirm the accuracy of search operations.
+     * The test also logs the number of successful k-NN queries
+     * during the concurrent search phase.
+     *
+     * @throws IOException if an I/O error occurs during index operations.
+     * @throws InterruptedException if the concurrent search phase is interrupted.
+     */
+    @Test
+    public void testLuceneKnnIndex_multipleMerges_with_ordering_check() throws IOException, InterruptedException {
+        final int numDocs = 10000;
+        final String floatVectorField = "vec";
+        final String expectedDocIdField = "expectedDocId";
+        final Path indexPath = createTempDir();
+        final float[][] sourceVectors = TestUtils.generateRandomVectors(numDocs, 2);
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+        try (Directory dir = newFSDirectory(indexPath)) {
+            IndexWriterConfig cfg = newIndexWriterConfig();
+            cfg.setCodec(getCodec());
+            cfg.setUseCompoundFile(false);
+            cfg.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
+            cfg.setMergeScheduler(new SerialMergeScheduler());
+
+            try (IndexWriter w = new IndexWriter(dir, cfg)) {
+                /* ---------- 1. index documents, create many tiny segments ---------- */
+                for (int i = 0; i < numDocs; i++) {
+                    Document doc = new Document();
+                    // vector whose first component encodes the future (segment-local) docID
+                    doc.add(new KnnFloatVectorField(floatVectorField, sourceVectors[i], vectorSimilarityFunction));
+                    doc.add(new StoredField(expectedDocIdField, i));
+                    w.addDocument(doc);
+                }
+                w.commit();
+
+                /* ---------- 2. run several merge cycles ---------- */
+                w.forceMerge(5);  // partial merge
+                w.forceMerge(3);  // another partial merge
+                w.forceMerge(1);  // final full merge
+            }
+
+            /* ---------- 3. open reader and assert the invariant ---------- */
+            try (DirectoryReader reader = DirectoryReader.open(dir)) {
+                assertEquals("we merged down to exactly one segment", 1, reader.leaves().size());
+
+                // (a) iterate through vectors directly
+                for (LeafReaderContext context : reader.leaves()) {
+                    FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
+                    final var docIdSetIterator = vectorValues.iterator(); // iterator for all the vectors with values
+                    int docId = -1;
+                    while ((docId = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+                        final int luceneDocId = context.docBase + docId;
+                        final int globalDocId = reader.storedFields()
+                            .document(luceneDocId)
+                            .getField(expectedDocIdField)
+                            .storedValue()
+                            .getIntValue();
+                        float[] vectorValue = vectorValues.vectorValue(docIdSetIterator.index());
+                        float[] expectedVectorValue = sourceVectors[globalDocId];
+                        // if the vectors do not match, also look which source vector should be the right result
+                        if (!Arrays.equals(expectedVectorValue, vectorValue)) {
+                            for (int i = 0; i < sourceVectors.length; i++) {
+                                if (Arrays.equals(sourceVectors[i], vectorValue)) {
+                                    log.error(
+                                        "found vector with global id: {}, in docId: {}, however the actual position of the vector in source is: {}",
+                                        globalDocId,
+                                        luceneDocId,
+                                        i
+                                    );
+                                }
+                            }
+                        }
+                        Assert.assertArrayEquals(
+                            "vector with global id "
+                                + globalDocId
+                                + " in source doesn't match vector value in lucene docID "
+                                + luceneDocId
+                                + " on the index",
+                            expectedVectorValue,
+                            vectorValue,
+                            0.0f
+                        );
+                    }
+                }
+
+                // (b) search with the same vector and confirm we are not exhausting the file handles with each search
+                IndexSearcher searcher = newSearcher(reader);
+                LeafReaderContext context = reader.leaves().get(0); // we only have one leaf at this point so we can use it to obtain the
+                                                                    // vector values
+                final int baseDocId = context.docBase;
+                final FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
+                final int k = 1;
+                for (int i = 0; i < reader.maxDoc(); i++) {
+                    float[] query = TestUtils.generateRandomVectors(1, 2)[0];
+                    TopDocs td = searcher.search(getJVectorKnnFloatVectorQuery("vec", query, k, new MatchAllDocsQuery()), k);
+                    assertEquals(k, td.scoreDocs.length);
+
+                    compareSearchResults(td, sourceVectors, reader, expectedDocIdField, baseDocId, vectorValues);
+                }
+
+                // (c) search with the same vector and this time add concurrency to make sure we are still not exhausting the file handles
+                int numThreads = 10; // Number of concurrent search threads
+                int queriesPerThread = 100; // Number of searches per thread
+                ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+                CountDownLatch latch = new CountDownLatch(numThreads);
+                AtomicBoolean failureDetected = new AtomicBoolean(false);
+                AtomicInteger totalQueries = new AtomicInteger(0);
+
+                try {
+                    for (int t = 0; t < numThreads; t++) {
+                        executor.submit(() -> {
+                            int i = 0;
+
+                            try {
+                                for (i = 0; i < queriesPerThread && !failureDetected.get(); i++) {
+                                    float[] query = TestUtils.generateRandomVectors(1, 2)[0];
+                                    try {
+                                        TopDocs td = searcher.search(new KnnFloatVectorQuery("vec", query, k), k);
+                                        assertEquals("Search should return correct number of results", k, td.scoreDocs.length);
+                                        compareSearchResults(td, sourceVectors, reader, expectedDocIdField, baseDocId, vectorValues);
+                                        totalQueries.incrementAndGet();
+                                    } catch (Throwable e) {
+                                        failureDetected.compareAndSet(false, true);
+                                        log.error("Exception encountered", e);
+                                        fail("Exception during concurrent search: " + e.getMessage());
+                                    }
+                                }
+                            } finally {
+                                latch.countDown();
+                                log.warn("Ran {} queries", i);
+                            }
+                        });
+                    }
+
+                    // Wait for all threads to complete or for a failure
+                    boolean completed = latch.await(30, TimeUnit.SECONDS);
+                    assertTrue("Test timed out while waiting for concurrent searches", completed);
+                    assertFalse("Test encountered failures during concurrent searches", failureDetected.get());
+                    assertEquals("Incorrect number of queries executed", numThreads * queriesPerThread, totalQueries.get());
+
+                    // Log the number of successful queries
+                    log.info("Successfully completed {} concurrent kNN search queries!", totalQueries.get());
+
+                } finally {
+                    executor.shutdownNow();
+                }
+            }
+        }
+
+    }
+
+    private void compareSearchResults(
+        TopDocs topDocs,
+        float[][] sourceVectors,
+        DirectoryReader reader,
+        String expectedDocIdField,
+        int baseDocId,
+        FloatVectorValues vectorValues
+    ) throws IOException {
+        // Get the ords matching the lucene doc ids so that we can later find their values in the {@link vectorValues}
+        final Map<Integer, Integer> docToOrdMap = new HashMap<>(); // docToOrd map
+        final var docIdSetIterator = vectorValues.iterator();
+        while (docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+            docToOrdMap.put(docIdSetIterator.docID() + baseDocId, docIdSetIterator.index());
+        }
+
+        for (int resultIdx = 0; resultIdx < topDocs.scoreDocs.length; resultIdx++) {
+            final int localDocId = topDocs.scoreDocs[resultIdx].doc;
+            final int globalDocId = reader.storedFields().document(localDocId).getField(expectedDocIdField).storedValue().getIntValue();
+
+            // Access to float values is not thread safe
+            final float[] vectorValue;
+            synchronized (vectorValues) {
+                vectorValue = vectorValues.vectorValue(docToOrdMap.get(localDocId));
+            }
+            float[] expectedVectorValue = sourceVectors[globalDocId];
+            Assert.assertArrayEquals("vectors in source and index should match", expectedVectorValue, vectorValue, 0.0f);
+        }
+    }
+
+    /**
+     * Test to verify that a document which has been deleted is no longer
+     * returned in a k-NN search.  The index uses the JVector codec and is
+     * kept in multiple segments to ensure we also cover the case where the
+     * deleted document still physically resides in the segment as a dead
+     * (non-live) record.
+     */
+    @Test
+    public void deletedDocs() throws IOException {
+        final int totalNumberOfDocs = 100;
+        final int batchSize = 10;
+        final int k = batchSize - 1;
+        final int docToDeleteInEachBatch = 5;
+        final Path indexPath = createTempDir();
+        final IndexWriterConfig iwc = newIndexWriterConfig();
+        // JVector codec requires compound files to be disabled at the moment
+        iwc.setUseCompoundFile(false);
+        iwc.setCodec(getCodec());
+        iwc.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
+
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter writer = new IndexWriter(dir, iwc)) {
+
+            /*
+             * 1.  Index 100 docs, in batches of 10.  Delete the 5th doc in each batch.
+             *     will leave us with 10 segments, each with 9 live docs.
+             */
+            int batchNumber = 0;
+            for (int i = 1; i <= totalNumberOfDocs; i++) {
+                Document doc = new Document();
+                final float[] vector = { 0.0f, 1.0f * (i + batchNumber) };
+                doc.add(new StringField("docId", Integer.toString(i + 1), Field.Store.YES));
+                doc.add(new KnnFloatVectorField("test_field", vector, VectorSimilarityFunction.EUCLIDEAN));
+                writer.addDocument(doc);
+                if (i % batchSize == 0) {
+                    writer.flush();
+                    writer.deleteDocuments(new TermQuery(new Term("docId", Integer.toString(i - docToDeleteInEachBatch))));
+                    batchNumber++;
+                }
+            }
+            writer.commit();
+
+            /* ----------------------------------------
+             * 2.  Merge all segments into one
+             * ---------------------------------------- */
+            writer.forceMerge(1);
+
+            /* ----------------------------------------
+             * 3.  Search – the deleted doc must be gone
+             * ---------------------------------------- */
+            try (IndexReader reader = DirectoryReader.open(writer)) {
+                assertEquals(
+                    "All documents except the deleted ones should be live",
+                    totalNumberOfDocs - (totalNumberOfDocs / batchSize),
+                    reader.numDocs()
+                );
+                // For each batch we will verify that the deleted document doesn't come up in search and only it's neighbours are returned
+
+                for (int i = 0; i < totalNumberOfDocs; i += batchSize) {
+                    final float[] target = { 0.0f, 1.0f * (i + docToDeleteInEachBatch) };
+                    final IndexSearcher searcher = newSearcher(reader);
+                    final KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery(
+                        "test_field",
+                        target,
+                        k,
+                        new MatchAllDocsQuery()
+                    );
+                    TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                    assertEquals(k, topDocs.totalHits.value());
+                    for (int j = 0; j < k; j++) {
+                        Document doc = reader.storedFields().document(topDocs.scoreDocs[j].doc);
+                        int docId = Integer.parseInt(doc.get("docId"));
+                        assertNotEquals("Deleted doc should not be returned in search results", i + docToDeleteInEachBatch, docId);
+                    }
+                }
+            }
+        }
+    }
+
+    /**
+     * Test to verify that the Lucene codec is able to successfully search for the nearest neighbours
+     * in the index.
+     * Single field is used to store the vectors.
+     * Documents are stored in potentially multiple segments.
+     * Multiple commits.
+     * Multiple merges.
+     * Merge is enabled.
+     * compound file is enabled.
+     */
+    @Test
+    public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOException {
+        int k = 3; // The number of nearest neighbors to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(true);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f / i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                w.addDocument(doc);
+                w.flush(); // this creates a new segment without triggering a merge
+            }
+            log.info("Done writing all files to the file system");
+
+            w.forceMerge(1); // this merges all segments into a single segment
+            log.info("Done merging all segments");
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have 1 segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(9, topDocs.scoreDocs[0].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.01f
+                );
+                assertEquals(8, topDocs.scoreDocs[1].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 9.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.01f
+                );
+                assertEquals(7, topDocs.scoreDocs[2].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.01f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test to verify that the Lucene codec is able to successfully search for the nearest neighbours
+     * in the index.
+     * Single field is used to store the vectors.
+     * Documents are stored in potentially multiple segments.
+     * Multiple commits.
+     * Multiple merges.
+     * Merge is enabled.
+     * compound file is enabled.
+     * cosine similarity is used.
+     */
+    @Test
+    public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOException {
+        int k = 3; // The number of nearest neighbours to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(true);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 1.0f, 1.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 1.0f + i, 2.0f * i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.COSINE));
+                w.addDocument(doc);
+                w.flush(); // this creates a new segment without triggering a merge
+            }
+            log.info("Done writing all files to the file system");
+
+            w.forceMerge(1); // this merges all segments into a single segment
+            log.info("Done merging all segments");
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have 1 segment with 10 documents");
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(0, topDocs.scoreDocs[0].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.COSINE.compare(target, new float[] { 2.0f, 2.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                assertEquals(1, topDocs.scoreDocs[1].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.COSINE.compare(target, new float[] { 3.0f, 4.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                assertEquals(2, topDocs.scoreDocs[2].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.COSINE.compare(target, new float[] { 4.0f, 6.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test to verify that the JVector codec is providing proper error if used with byte vector
+     * TODO: Create Binary Quantization support for JVector codec
+     */
+    @Test
+    public void testJVectorKnnIndex_simpleCase_withBinaryVector() throws IOException {
+        int k = 3; // The number of nearest neighbours to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        // TODO: re-enable this after fixing the compound file augmentation for JVector
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (Directory dir = newFSDirectory(indexPath); RandomIndexWriter w = new RandomIndexWriter(random(), dir, indexWriterConfig)) {
+            final byte[] source = new byte[] { (byte) 0, (byte) 0 };
+            final Document doc = new Document();
+            doc.add(new KnnByteVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+            Assert.assertThrows(UnsupportedOperationException.class, () -> w.addDocument(doc));
+        }
+    }
+
+    /**
+     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+     * in the index with a filter applied.
+     */
+    @Test
+    public void testJVectorKnnIndex_withFilter() throws IOException {
+        int k = 3; // The number of nearest neighbours to gather
+        int totalNumberOfDocs = 10;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec());
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (Directory dir = newFSDirectory(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = new float[] { 0.0f, 0.0f };
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = new float[] { 0.0f, 1.0f / i };
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                doc.add(new StringField("filter_field", i % 2 == 0 ? "even" : "odd", Field.Store.YES));
+                w.addDocument(doc);
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.commit();
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("Applying filter to the KNN search");
+                final Query filterQuery = new TermQuery(new Term("filter_field", "even"));
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+
+                log.info("Validating filtered KNN results");
+                assertEquals(k, topDocs.totalHits.value());
+                assertEquals(9, topDocs.scoreDocs[0].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
+                    topDocs.scoreDocs[0].score,
+                    0.001f
+                );
+                assertEquals(7, topDocs.scoreDocs[1].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
+                    topDocs.scoreDocs[1].score,
+                    0.001f
+                );
+                assertEquals(5, topDocs.scoreDocs[2].doc);
+                Assert.assertEquals(
+                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 6.0f }),
+                    topDocs.scoreDocs[2].score,
+                    0.001f
+                );
+                log.info("successfully completed filtered search tests");
+            }
+        }
+    }
+
+    /**
+     * Test the simple case of quantization where we have the perfect batch single batch size with no merges or too small batch sizes
+     */
+    @Test
+    public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException {
+        int k = 50; // The number of nearest neighbours to gather
+        int dimension = 16;
+        int totalNumberOfDocs = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+                                                     // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+            for (int i = 0; i < vectors.length; i++) {
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField(TEST_FIELD, vectors[i], vectorSimilarityFunction));
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                w.addDocument(doc);
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.commit();
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+                Assert.assertEquals(1.0f, recall, 0.05f);
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test recall with different types of rerank parameters
+     */
+    @Test
+    public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOException {
+        int k = 1; // The number of nearest neighbours to gather
+        int dimension = 16;
+        int totalNumberOfDocs = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+                final float[] source = generateZerosVectorWithLastValue(dimension, i);
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+                w.addDocument(doc);
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.commit();
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                float expectedMinScoreInTopK = VectorSimilarityFunction.EUCLIDEAN.compare(
+                    target,
+                    new float[] { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, k }
+                );
+
+                // Query with essentially no reranking and expect recall to be very low
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+
+                final float recallWithLowOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
+
+                // Query with reranking and expect recall to be high
+                knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 5);
+                topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                float recallWithHighOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
+                Assert.assertTrue(recallWithLowOverqueryFactor <= recallWithHighOverqueryFactor);
+
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test the simple case of quantization where we have the perfect batch single batch size each time with a merge of
+     * multiple segments
+     */
+    @Test
+    public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() throws IOException {
+        final int dimension = 16;
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
+                          // jittery tests
+        final int perfectBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION; // MINIMUM_BATCH_SIZE_FOR_QUANTIZATION is the minimal
+                                                                                  // batch size that will trigger a quantization without
+                                                                                  // breaking it, generally speaking the batch size can't be
+                                                                                  // lower than the number of clusters
+        final int totalNumberOfDocs = perfectBatchSize * 2;
+
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+
+            for (int i = 0; i < vectors.length; i++) {
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField(TEST_FIELD, vectors[i], vectorSimilarityFunction));
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                w.addDocument(doc);
+                if (i % perfectBatchSize == 0) {
+                    w.commit();
+                }
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.forceMerge(1);
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+                Assert.assertEquals(1.0f, recall, 0.05f);
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test the non-ideal case where batch sizes are not perfect and are lower than the number of recommended clusters in the index
+     * The expected behavior is for the quantization to only kick in when we have a merge or batch size that is bigger than the minimal required batch size
+     */
+    @Test
+    public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges() throws IOException {
+        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
+                          // jittery tests
+        final int dimension = 16;
+        final int notIdealBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION / 3; // Batch size that is not ideal for quantization and
+                                                                                       // shouldn't trigger it
+        final int totalNumberOfDocs = notIdealBatchSize * 3; // 3 batches of documents each will result in quantization only when the merge
+                                                             // is triggered, and we have a batch size of {@link
+                                                             // MINIMUM_BATCH_SIZE_FOR_QUANTIZATION} as a result of merging all the smaller
+                                                             // batches
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(false);
+        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+            for (int i = 0; i < totalNumberOfDocs; i++) {
+                final float[] source = vectors[i];
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                w.addDocument(doc);
+                if (i % notIdealBatchSize == 0) {
+                    w.commit();
+                }
+            }
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.forceMerge(1);
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+                Assert.assertEquals(1.0f, recall, 0.05f);
+                log.info("successfully completed search tests");
+            }
+        }
+    }
+
+    /**
+     * Test the non-ideal case where batch sizes are not perfect and are lower than the number of recommended clusters in the index
+     * The expected behavior is for the quantization to only kick in when we have a merge or batch size that is bigger than the minimal required batch size
+     * Also this is adding the compound file to the mix
+     */
+    @Test
+    public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_withCompoundFile() throws IOException {
+        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
+        // jittery tests
+        final int dimension = 16;
+        final int notIdealBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION / 3; // Batch size that is not ideal for quantization and
+        // shouldn't trigger it
+        final int totalNumberOfDocs = notIdealBatchSize * 10; // 3 batches of documents each will result in quantization only when the merge
+        // is triggered, and we have a batch size of {@link MINIMUM_BATCH_SIZE_FOR_QUANTIZATION}
+        // as a result of merging all the smaller batches
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+        boolean useCompoundFile = true;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(useCompoundFile);
+        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(useCompoundFile));
+        // We set the below parameters to make sure no premature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+            // We will use random vectors because otherwise PQ will have a correlated subspaces which will result in a broken linear graph
+            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+            for (int i = 0; i < totalNumberOfDocs; i++) {
+                final float[] source = vectors[i];
+                final Document doc = new Document();
+                doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                w.addDocument(doc);
+                if (i % notIdealBatchSize == 0) {
+                    w.commit();
+                }
+            }
+            w.commit();
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.forceMerge(1);
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+                Assert.assertEquals("Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
+                log.info("successfully completed search tests");
+            }
+        }
+
+        Assert.assertTrue("No quantization time recorded", KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount() > 0);
+        Assert.assertTrue("No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+    }
+
+    /**
+     * We will use multiple batches, each can trigger a quantization and later merge them in an appending order to keep track
+     * of refinement
+     * @throws IOException
+     */
+    @Test
+    public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinement() throws IOException {
+        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
+        // jittery tests
+        final int dimension = 16;
+        final int idealBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION; // Batch size that is not ideal for quantization and
+        // shouldn't trigger it
+        final int totalNumberOfDocs = idealBatchSize * 10; // 10 batches, each batch on it's own will trigger quantization
+        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+        boolean useCompoundFile = true;
+        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+        indexWriterConfig.setUseCompoundFile(useCompoundFile);
+        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(useCompoundFile));
+        // We set the below parameters to make sure no premature flush will occur, this way we can have a single segment, and we can force
+        // test the quantization case
+        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
+        // segment for a totalNumberOfDocs < 1000
+        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+        final Path indexPath = createTempDir();
+        log.info("Index path: {}", indexPath);
+        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+            // We will use random vectors because otherwise PQ will have a correlated subspaces which will result in a broken linear graph
+            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+            for (int i = 0; i < totalNumberOfDocs; i++) {
+                final float[] source = vectors[i];
+                final Document doc = new Document();
+                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+                doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
+                w.addDocument(doc);
+                if (i % idealBatchSize == 0) {
+                    final long beforeTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
+                    w.commit();
+                    w.forceMerge(1); // force merge will trigger PQ refinement if other segments are present
+                    final long afterTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
+                    Assert.assertTrue(
+                        "Expected to have a training time of at least " + beforeTrainingTime + " but got " + afterTrainingTime,
+                        afterTrainingTime >= beforeTrainingTime
+                    );
+                }
+            }
+            w.commit();
+            log.info("Flushing docs to make them discoverable on the file system");
+            w.forceMerge(1);
+
+            try (IndexReader reader = DirectoryReader.open(w)) {
+                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+                Assert.assertEquals(1, reader.getContext().leaves().size());
+                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+                final Query filterQuery = new MatchAllDocsQuery();
+                final IndexSearcher searcher = newSearcher(reader);
+                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
+                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+                assertEquals(k, topDocs.totalHits.value());
+                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+                Assert.assertEquals("Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
+                log.info("successfully completed search tests");
+            }
+        }
+
+        Assert.assertTrue("No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+    }
+
+    /**
+     * Calculate the recall for the top k documents
+     * For simplicity we assume that all documents have unique scores and therefore the minimum score in the top k documents is the kth document
+     * @param topDocs the top documents returned by the search
+     * @param minScoreInTopK the minimum score in the top k documents
+     * @return the recall of the top k documents
+     */
+    private float calculateRecall(TopDocs topDocs, float minScoreInTopK) {
+        int totalRelevantDocs = 0;
+        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+            if (topDocs.scoreDocs[i].score >= minScoreInTopK) {
+                totalRelevantDocs++;
+            }
+        }
+        float recall = ((float) totalRelevantDocs) / ((float) topDocs.scoreDocs.length);
+
+        if (recall == 0.0f) {
+            log.info(
+                "Recall is 0.0, this is probably not correct, here is some debug information\n topDocs: {}, minScoreInTopK: {}, totalRelevantDocs: {}",
+                topDocsToString(topDocs),
+                minScoreInTopK,
+                totalRelevantDocs
+            );
+        }
+        return recall;
+    }
+
+    // convert topDocs to a pretty printed string
+    private String topDocsToString(TopDocs topDocs) {
+        StringBuilder sb = new StringBuilder();
+        sb.append("TopDocs: [");
+        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+            sb.append(topDocs.scoreDocs[i].doc).append(" (").append(topDocs.scoreDocs[i].score).append("), ");
+        }
+        sb.append("]");
+        return sb.toString();
+    }
+
+    private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(String fieldName, float[] target, int k, Query filterQuery) {
+        return getJVectorKnnFloatVectorQuery(fieldName, target, k, filterQuery, KNNConstants.DEFAULT_OVER_QUERY_FACTOR);
+    }
+
+    private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
+        String fieldName,
+        float[] target,
+        int k,
+        Query filterQuery,
+        int overQueryFactor
+    ) {
+        return new JVectorKnnFloatVectorQuery(
+            fieldName,
+            target,
+            k,
+            filterQuery,
+            overQueryFactor,
+            KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
+            KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
+            KNNConstants.DEFAULT_QUERY_USE_PRUNING
+        );
+    }
+
+    private static float[][] getMonotonicallyIncreasingVectors(int numVectors, int vectorDimension) {
+        float[][] vectors = new float[numVectors][vectorDimension];
+        for (int i = 0; i < numVectors; i++) {
+            vectors[i] = generateZerosVectorWithLastValue(vectorDimension, i);
+        }
+
+        return vectors;
+    }
+
+    private static float[] generateZerosVectorWithLastValue(int vectorDimension, int lastValue) {
+        float[] vector = new float[vectorDimension];
+        for (int i = 0; i < vectorDimension - 1; i++) {
+            vector[i] = 0;
+        }
+        vector[vectorDimension - 1] = lastValue;
+        return vector;
+    }
+
+    private static float calculateRecall(IndexReader reader, Set<Integer> groundTruthVectorsIds, TopDocs topDocs, int k)
+        throws IOException {
+        final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+        Assert.assertEquals(groundTruthVectorsIds.size(), scoreDocs.length);
+        int totalRelevantDocs = 0;
+        for (ScoreDoc scoreDoc : scoreDocs) {
+            final int id = reader.storedFields().document(scoreDoc.doc).getField(TEST_ID_FIELD).storedValue().getIntValue();
+            if (groundTruthVectorsIds.contains(id)) {
+                totalRelevantDocs++;
+            }
+        }
+        return ((float) totalRelevantDocs) / ((float) k);
+    }
+
+    /**
+     * Find the IDs of the ground truth vectors in the dataset
+     * @param query query vector
+     * @param dataset dataset of all the vectors with their ordinal position in the array as their ID
+     * @param k the number of expected results
+     * @return the IDs of the ground truth vectors in the dataset
+     */
+    private static Set<Integer> calculateGroundTruthVectorsIds(
+        float[] query,
+        final float[][] dataset,
+        int k,
+        VectorSimilarityFunction vectorSimilarityFunction
+    ) {
+        final Set<Integer> groundTruthVectorsIds = new HashSet<>();
+        final PriorityQueue<ScoreDoc> priorityQueue = new PriorityQueue<>(k, (o1, o2) -> Float.compare(o1.score, o2.score));
+        for (int i = 0; i < dataset.length; i++) {
+            ScoreDoc scoreDoc = new ScoreDoc(i, vectorSimilarityFunction.compare(query, dataset[i]));
+            if (priorityQueue.size() >= k) {
+                final ScoreDoc top = priorityQueue.poll();
+                if (top.score < scoreDoc.score) {
+                    priorityQueue.add(scoreDoc);
+                } else {
+                    priorityQueue.add(top);
+                }
+            } else {
+                priorityQueue.add(scoreDoc);
+            }
+        }
+        while (!priorityQueue.isEmpty()) {
+            groundTruthVectorsIds.add(priorityQueue.poll().doc);
+        }
+
+        return groundTruthVectorsIds;
+    }
+}

From c65c4679bb42f8b33018ee03025b03d7ee54c151 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 15:20:24 +0000
Subject: [PATCH 03/86] Fix license headers

---
 .../jvector/ForceMergesOnlyMergePolicy.java      | 16 ++++++++++++++--
 .../codecs/jvector/GraphNodeIdToDocMap.java      | 16 ++++++++++++++--
 .../codecs/jvector/JVectorFloatVectorValues.java | 16 ++++++++++++++--
 .../sandbox/codecs/jvector/JVectorFormat.java    | 16 ++++++++++++++--
 .../codecs/jvector/JVectorIndexWriter.java       | 16 ++++++++++++++--
 .../codecs/jvector/JVectorKnnCollector.java      | 16 ++++++++++++++--
 .../jvector/JVectorKnnFloatVectorQuery.java      | 16 ++++++++++++++--
 .../jvector/JVectorRandomAccessReader.java       | 16 ++++++++++++++--
 .../sandbox/codecs/jvector/JVectorReader.java    | 16 ++++++++++++++--
 .../codecs/jvector/JVectorVectorScorer.java      | 16 ++++++++++++++--
 .../sandbox/codecs/jvector/JVectorWriter.java    | 16 ++++++++++++++--
 .../sandbox/codecs/jvector/KNNJVectorTests.java  | 16 ++++++++++++++--
 12 files changed, 168 insertions(+), 24 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
index 8357a5fcdb46..71e11ce22d22 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index 7fff91e12062..28c19df90b21 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index ce3008a79c29..7d80fb0f6918 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 5d25622d3df6..29cefe6598e2 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
index b01b4c8db1bb..c4cc2f715bec 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
index 573726f5f19a..32b35af7c012 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.opensearch.knn.index.codec.jvector;
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
index 922a7dcd55b1..1ee729db1543 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 package org.opensearch.knn.index.codec.jvector;
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index c3b823010c6d..0599ff2121cb 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 3c8aa4622000..8e36c1c3dda3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
index e27b168b6362..3e0b042dbe2a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 434e08a6964e..764d4a21a15f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index 899663214405..b562e52fd4a1 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -1,6 +1,18 @@
 /*
- * Copyright OpenSearch Contributors
- * SPDX-License-Identifier: Apache-2.0
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
  */
 
 package org.opensearch.knn.index.codec.jvector;

From cf2aa85226ba6bd3d00cf742dc6a2bc54522561c Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 15:21:25 +0000
Subject: [PATCH 04/86] Run tidy

---
 .../jvector/ForceMergesOnlyMergePolicy.java   |  150 +-
 .../codecs/jvector/GraphNodeIdToDocMap.java   |  255 +-
 .../jvector/JVectorFloatVectorValues.java     |  203 +-
 .../sandbox/codecs/jvector/JVectorFormat.java |  352 +-
 .../codecs/jvector/JVectorIndexWriter.java    |  176 +-
 .../codecs/jvector/JVectorKnnCollector.java   |   87 +-
 .../jvector/JVectorKnnFloatVectorQuery.java   |  122 +-
 .../jvector/JVectorRandomAccessReader.java    |  281 +-
 .../sandbox/codecs/jvector/JVectorReader.java |  652 ++--
 .../codecs/jvector/JVectorVectorScorer.java   |   43 +-
 .../sandbox/codecs/jvector/JVectorWriter.java | 1996 ++++++-----
 .../codecs/jvector/KNNJVectorTests.java       | 3067 +++++++++--------
 12 files changed, 3810 insertions(+), 3574 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
index 71e11ce22d22..d43e7e4ac80f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
@@ -17,95 +17,99 @@
 
 package org.opensearch.knn.index.codec.jvector;
 
+import java.io.IOException;
+import java.util.List;
+import java.util.Map;
 import org.apache.lucene.index.MergePolicy;
 import org.apache.lucene.index.MergeTrigger;
 import org.apache.lucene.index.SegmentCommitInfo;
 import org.apache.lucene.index.SegmentInfos;
 
-import java.io.IOException;
-import java.util.List;
-import java.util.Map;
-
 /**
- * A merge policy that only merges segments if they are forced.
- * This is useful for testing and benchmarking purposes. Since it can be used for benchmarks, it is placed in the common
- * codec module.
+ * A merge policy that only merges segments if they are forced. This is useful for testing and
+ * benchmarking purposes. Since it can be used for benchmarks, it is placed in the common codec
+ * module.
  */
 public class ForceMergesOnlyMergePolicy extends MergePolicy {
-    private final boolean useCompoundFile;
+  private final boolean useCompoundFile;
 
-    public ForceMergesOnlyMergePolicy() {
-        this(false);
-    }
-
-    public ForceMergesOnlyMergePolicy(boolean useCompoundFile) {
-        super();
-        this.useCompoundFile = useCompoundFile;
-    }
+  public ForceMergesOnlyMergePolicy() {
+    this(false);
+  }
 
-    @Override
-    public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
-        throws IOException {
-        return null;
-    }
+  public ForceMergesOnlyMergePolicy(boolean useCompoundFile) {
+    super();
+    this.useCompoundFile = useCompoundFile;
+  }
 
-    @Override
-    public MergeSpecification findForcedMerges(
-        SegmentInfos segmentInfos,
-        int maxSegmentCount,
-        Map<SegmentCommitInfo, Boolean> segmentsToMerge,
-        MergeContext mergeContext
-    ) throws IOException {
-        // If the segments are already merged (e.g. there's only 1 segment), or
-        // there are <maxNumSegments:.
-        if (isMerged(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext)) {
-            if (verbose(mergeContext)) {
-                message("already merged; skip", mergeContext);
-            }
-            return null;
-        }
-        final List<SegmentCommitInfo> segments = segmentInfos.asList();
-        MergeSpecification spec = new MergeSpecification();
+  @Override
+  public MergeSpecification findMerges(
+      MergeTrigger mergeTrigger, SegmentInfos segmentInfos, MergeContext mergeContext)
+      throws IOException {
+    return null;
+  }
 
-        final OneMerge merge = new OneMerge(segments);
-        spec.add(merge);
-        return spec;
+  @Override
+  public MergeSpecification findForcedMerges(
+      SegmentInfos segmentInfos,
+      int maxSegmentCount,
+      Map<SegmentCommitInfo, Boolean> segmentsToMerge,
+      MergeContext mergeContext)
+      throws IOException {
+    // If the segments are already merged (e.g. there's only 1 segment), or
+    // there are <maxNumSegments:.
+    if (isMerged(segmentInfos, maxSegmentCount, segmentsToMerge, mergeContext)) {
+      if (verbose(mergeContext)) {
+        message("already merged; skip", mergeContext);
+      }
+      return null;
     }
+    final List<SegmentCommitInfo> segments = segmentInfos.asList();
+    MergeSpecification spec = new MergeSpecification();
 
-    @Override
-    public boolean useCompoundFile(SegmentInfos segmentInfos, SegmentCommitInfo newSegment, MergeContext mergeContext) throws IOException {
-        return useCompoundFile;
-    }
+    final OneMerge merge = new OneMerge(segments);
+    spec.add(merge);
+    return spec;
+  }
 
-    @Override
-    public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
-        return null;
-    }
+  @Override
+  public boolean useCompoundFile(
+      SegmentInfos segmentInfos, SegmentCommitInfo newSegment, MergeContext mergeContext)
+      throws IOException {
+    return useCompoundFile;
+  }
 
-    /**
-     * Returns true if the number of segments eligible for merging is less than or equal to the
-     * specified {@code maxNumSegments}.
-     */
-    protected boolean isMerged(
-        SegmentInfos infos,
-        int maxNumSegments,
-        Map<SegmentCommitInfo, Boolean> segmentsToMerge,
-        MergeContext mergeContext
-    ) throws IOException {
-        final int numSegments = infos.size();
-        int numToMerge = 0;
-        SegmentCommitInfo mergeInfo = null;
-        boolean segmentIsOriginal = false;
-        for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++) {
-            final SegmentCommitInfo info = infos.info(i);
-            final Boolean isOriginal = segmentsToMerge.get(info);
-            if (isOriginal != null) {
-                segmentIsOriginal = isOriginal;
-                numToMerge++;
-                mergeInfo = info;
-            }
-        }
+  @Override
+  public MergeSpecification findForcedDeletesMerges(
+      SegmentInfos segmentInfos, MergeContext mergeContext) throws IOException {
+    return null;
+  }
 
-        return numToMerge <= maxNumSegments && (numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, mergeContext));
+  /**
+   * Returns true if the number of segments eligible for merging is less than or equal to the
+   * specified {@code maxNumSegments}.
+   */
+  protected boolean isMerged(
+      SegmentInfos infos,
+      int maxNumSegments,
+      Map<SegmentCommitInfo, Boolean> segmentsToMerge,
+      MergeContext mergeContext)
+      throws IOException {
+    final int numSegments = infos.size();
+    int numToMerge = 0;
+    SegmentCommitInfo mergeInfo = null;
+    boolean segmentIsOriginal = false;
+    for (int i = 0; i < numSegments && numToMerge <= maxNumSegments; i++) {
+      final SegmentCommitInfo info = infos.info(i);
+      final Boolean isOriginal = segmentsToMerge.get(info);
+      if (isOriginal != null) {
+        segmentIsOriginal = isOriginal;
+        numToMerge++;
+        mergeInfo = info;
+      }
     }
+
+    return numToMerge <= maxNumSegments
+        && (numToMerge != 1 || !segmentIsOriginal || isMerged(infos, mergeInfo, mergeContext));
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index 28c19df90b21..ce6050088d68 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -17,146 +17,157 @@
 
 package org.opensearch.knn.index.codec.jvector;
 
+import java.io.IOException;
+import java.util.Arrays;
 import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.index.Sorter;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 
-import java.io.IOException;
-import java.util.Arrays;
-
 /**
- * This class represents the mapping from the Lucene document IDs to the jVector ordinals.
- * This mapping is necessary because the jVector ordinals can be different from the Lucene document IDs and when lucene documentIDs change after a merge,
- * we need to update this mapping to reflect the new document IDs.
- * This requires us to know the previous mapping from the previous merge and the new mapping from the current merge.
- * <p>
- * Which means that we also need to persist this mapping to disk to be available across merges.
+ * This class represents the mapping from the Lucene document IDs to the jVector ordinals. This
+ * mapping is necessary because the jVector ordinals can be different from the Lucene document IDs
+ * and when lucene documentIDs change after a merge, we need to update this mapping to reflect the
+ * new document IDs. This requires us to know the previous mapping from the previous merge and the
+ * new mapping from the current merge.
+ *
+ * <p>Which means that we also need to persist this mapping to disk to be available across merges.
  */
 @Log4j2
 public class GraphNodeIdToDocMap {
-    private static final int VERSION = 1;
-    private int[] graphNodeIdsToDocIds;
-    private int[] docIdsToGraphNodeIds;
-
-    /**
-     * Constructor that reads the mapping from the index input
-     *
-     * @param in The index input
-     * @throws IOException if an I/O error occurs
-     */
-    public GraphNodeIdToDocMap(IndexInput in) throws IOException {
-        final int version = in.readInt(); // Read the version
-        if (version != VERSION) {
-            throw new IOException("Unsupported version: " + version);
-        }
-        int size = in.readVInt();
-        int maxDocId = in.readVInt();
+  private static final int VERSION = 1;
+  private int[] graphNodeIdsToDocIds;
+  private int[] docIdsToGraphNodeIds;
 
-        graphNodeIdsToDocIds = new int[size];
-        docIdsToGraphNodeIds = new int[maxDocId];
-        for (int ord = 0; ord < size; ord++) {
-            final int docId = in.readVInt();
-            graphNodeIdsToDocIds[ord] = docId;
-            docIdsToGraphNodeIds[docId] = ord;
-        }
+  /**
+   * Constructor that reads the mapping from the index input
+   *
+   * @param in The index input
+   * @throws IOException if an I/O error occurs
+   */
+  public GraphNodeIdToDocMap(IndexInput in) throws IOException {
+    final int version = in.readInt(); // Read the version
+    if (version != VERSION) {
+      throw new IOException("Unsupported version: " + version);
     }
+    int size = in.readVInt();
+    int maxDocId = in.readVInt();
 
-    /**
-     * Constructor that creates a new mapping between ordinals and docIds
-     *
-     * @param graphNodeIdsToDocIds The mapping from ordinals to docIds
-     */
-    public GraphNodeIdToDocMap(int[] graphNodeIdsToDocIds) {
-        if (graphNodeIdsToDocIds.length == 0) {
-            this.graphNodeIdsToDocIds = new int[0];
-            this.docIdsToGraphNodeIds = new int[0];
-            return;
-        }
-        this.graphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
-        System.arraycopy(graphNodeIdsToDocIds, 0, this.graphNodeIdsToDocIds, 0, graphNodeIdsToDocIds.length);
-        final int maxDocId = Arrays.stream(graphNodeIdsToDocIds).max().getAsInt();
-        final int maxDocs = maxDocId + 1;
-        // We are going to assume that the number of ordinals is roughly the same as the number of documents in the segment, therefore,
-        // the mapping will not be sparse.
-        if (maxDocs < graphNodeIdsToDocIds.length) {
-            throw new IllegalStateException("Max docs " + maxDocs + " is less than the number of ordinals " + graphNodeIdsToDocIds.length);
-        }
-        if (maxDocId > graphNodeIdsToDocIds.length) {
-            log.warn(
-                "Max doc id {} is greater than the number of ordinals {}, this implies a lot of deleted documents. Or that some documents are missing vectors. Wasting a lot of memory",
-                maxDocId,
-                graphNodeIdsToDocIds.length
-            );
-        }
-        this.docIdsToGraphNodeIds = new int[maxDocs];
-        Arrays.fill(this.docIdsToGraphNodeIds, -1); // -1 means no mapping to ordinal
-        for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
-            this.docIdsToGraphNodeIds[graphNodeIdsToDocIds[ord]] = ord;
-        }
+    graphNodeIdsToDocIds = new int[size];
+    docIdsToGraphNodeIds = new int[maxDocId];
+    for (int ord = 0; ord < size; ord++) {
+      final int docId = in.readVInt();
+      graphNodeIdsToDocIds[ord] = docId;
+      docIdsToGraphNodeIds[docId] = ord;
     }
+  }
 
-    /**
-     * Updates the mapping from the Lucene document IDs to the jVector ordinals based on the sort operation. (during flush)
-     *
-     * @param sortMap The sort map
-     */
-    public void update(Sorter.DocMap sortMap) {
-        final int[] newGraphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
-        final int maxNewDocId = Arrays.stream(graphNodeIdsToDocIds).map(sortMap::oldToNew).max().getAsInt();
-        final int maxDocs = maxNewDocId + 1;
-        if (maxDocs < graphNodeIdsToDocIds.length) {
-            throw new IllegalStateException("Max docs " + maxDocs + " is less than the number of ordinals " + graphNodeIdsToDocIds.length);
-        }
-        final int[] newDocIdsToOrdinals = new int[maxDocs];
-        Arrays.fill(newDocIdsToOrdinals, -1);
-        for (int oldDocId = 0; oldDocId < docIdsToGraphNodeIds.length; oldDocId++) {
-            if (docIdsToGraphNodeIds[oldDocId] == -1) {
-                continue;
-            }
-            final int newDocId = sortMap.oldToNew(oldDocId);
-            final int oldOrd = docIdsToGraphNodeIds[oldDocId];
-            newDocIdsToOrdinals[newDocId] = oldOrd;
-            newGraphNodeIdsToDocIds[oldOrd] = newDocId;
-        }
-        this.docIdsToGraphNodeIds = newDocIdsToOrdinals;
-        this.graphNodeIdsToDocIds = newGraphNodeIdsToDocIds;
+  /**
+   * Constructor that creates a new mapping between ordinals and docIds
+   *
+   * @param graphNodeIdsToDocIds The mapping from ordinals to docIds
+   */
+  public GraphNodeIdToDocMap(int[] graphNodeIdsToDocIds) {
+    if (graphNodeIdsToDocIds.length == 0) {
+      this.graphNodeIdsToDocIds = new int[0];
+      this.docIdsToGraphNodeIds = new int[0];
+      return;
     }
-
-    /**
-     * Returns the jVector node id for the given Lucene document ID
-     *
-     * @param luceneDocId The Lucene document ID
-     * @return The jVector ordinal
-     */
-    public int getJVectorNodeId(int luceneDocId) {
-        return docIdsToGraphNodeIds[luceneDocId];
+    this.graphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
+    System.arraycopy(
+        graphNodeIdsToDocIds, 0, this.graphNodeIdsToDocIds, 0, graphNodeIdsToDocIds.length);
+    final int maxDocId = Arrays.stream(graphNodeIdsToDocIds).max().getAsInt();
+    final int maxDocs = maxDocId + 1;
+    // We are going to assume that the number of ordinals is roughly the same as the number of
+    // documents in the segment, therefore,
+    // the mapping will not be sparse.
+    if (maxDocs < graphNodeIdsToDocIds.length) {
+      throw new IllegalStateException(
+          "Max docs "
+              + maxDocs
+              + " is less than the number of ordinals "
+              + graphNodeIdsToDocIds.length);
+    }
+    if (maxDocId > graphNodeIdsToDocIds.length) {
+      log.warn(
+          "Max doc id {} is greater than the number of ordinals {}, this implies a lot of deleted documents. Or that some documents are missing vectors. Wasting a lot of memory",
+          maxDocId,
+          graphNodeIdsToDocIds.length);
+    }
+    this.docIdsToGraphNodeIds = new int[maxDocs];
+    Arrays.fill(this.docIdsToGraphNodeIds, -1); // -1 means no mapping to ordinal
+    for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
+      this.docIdsToGraphNodeIds[graphNodeIdsToDocIds[ord]] = ord;
     }
+  }
 
-    /**
-     * Returns the Lucene document ID for the given jVector node id
-     *
-     * @param graphNodeId The jVector ordinal
-     * @return The Lucene document ID
-     * <p>
-     * NOTE: This method is useful when, for example, we want to remap acceptedDocs bitmap from Lucene to jVector ordinal bitmap filter
-     */
-    public int getLuceneDocId(int graphNodeId) {
-        return graphNodeIdsToDocIds[graphNodeId];
+  /**
+   * Updates the mapping from the Lucene document IDs to the jVector ordinals based on the sort
+   * operation. (during flush)
+   *
+   * @param sortMap The sort map
+   */
+  public void update(Sorter.DocMap sortMap) {
+    final int[] newGraphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
+    final int maxNewDocId =
+        Arrays.stream(graphNodeIdsToDocIds).map(sortMap::oldToNew).max().getAsInt();
+    final int maxDocs = maxNewDocId + 1;
+    if (maxDocs < graphNodeIdsToDocIds.length) {
+      throw new IllegalStateException(
+          "Max docs "
+              + maxDocs
+              + " is less than the number of ordinals "
+              + graphNodeIdsToDocIds.length);
     }
+    final int[] newDocIdsToOrdinals = new int[maxDocs];
+    Arrays.fill(newDocIdsToOrdinals, -1);
+    for (int oldDocId = 0; oldDocId < docIdsToGraphNodeIds.length; oldDocId++) {
+      if (docIdsToGraphNodeIds[oldDocId] == -1) {
+        continue;
+      }
+      final int newDocId = sortMap.oldToNew(oldDocId);
+      final int oldOrd = docIdsToGraphNodeIds[oldDocId];
+      newDocIdsToOrdinals[newDocId] = oldOrd;
+      newGraphNodeIdsToDocIds[oldOrd] = newDocId;
+    }
+    this.docIdsToGraphNodeIds = newDocIdsToOrdinals;
+    this.graphNodeIdsToDocIds = newGraphNodeIdsToDocIds;
+  }
+
+  /**
+   * Returns the jVector node id for the given Lucene document ID
+   *
+   * @param luceneDocId The Lucene document ID
+   * @return The jVector ordinal
+   */
+  public int getJVectorNodeId(int luceneDocId) {
+    return docIdsToGraphNodeIds[luceneDocId];
+  }
+
+  /**
+   * Returns the Lucene document ID for the given jVector node id
+   *
+   * @param graphNodeId The jVector ordinal
+   * @return The Lucene document ID
+   *     <p>NOTE: This method is useful when, for example, we want to remap acceptedDocs bitmap from
+   *     Lucene to jVector ordinal bitmap filter
+   */
+  public int getLuceneDocId(int graphNodeId) {
+    return graphNodeIdsToDocIds[graphNodeId];
+  }
 
-    /**
-     * Writes the mapping to the index output
-     *
-     * @param out The index output
-     * @throws IOException if an I/O error occurs
-     */
-    public void toOutput(IndexOutput out) throws IOException {
-        out.writeInt(VERSION);
-        out.writeVInt(graphNodeIdsToDocIds.length);
-        out.writeVInt(docIdsToGraphNodeIds.length);
-        for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
-            out.writeVInt(graphNodeIdsToDocIds[ord]);
-        }
+  /**
+   * Writes the mapping to the index output
+   *
+   * @param out The index output
+   * @throws IOException if an I/O error occurs
+   */
+  public void toOutput(IndexOutput out) throws IOException {
+    out.writeInt(VERSION);
+    out.writeVInt(graphNodeIdsToDocIds.length);
+    out.writeVInt(docIdsToGraphNodeIds.length);
+    for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
+      out.writeVInt(graphNodeIdsToDocIds[ord]);
     }
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 7d80fb0f6918..5dba75410ac0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -23,111 +23,112 @@
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
+import java.io.IOException;
 import org.apache.lucene.index.FloatVectorValues;
 import org.apache.lucene.search.VectorScorer;
 
-import java.io.IOException;
-
 public class JVectorFloatVectorValues extends FloatVectorValues {
-    private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
-
-    private final OnDiskGraphIndex.View view;
-    private final VectorSimilarityFunction similarityFunction;
-    private final int dimension;
-    private final int size;
-    private final GraphNodeIdToDocMap graphNodeIdToDocMap;
-
-    public JVectorFloatVectorValues(
-        OnDiskGraphIndex onDiskGraphIndex,
-        VectorSimilarityFunction similarityFunction,
-        GraphNodeIdToDocMap graphNodeIdToDocMap
-    ) throws IOException {
-        this.view = onDiskGraphIndex.getView();
-        this.dimension = view.dimension();
-        this.size = view.size();
-        this.similarityFunction = similarityFunction;
-        this.graphNodeIdToDocMap = graphNodeIdToDocMap;
-    }
-
-    @Override
-    public int dimension() {
-        return dimension;
-    }
-
-    @Override
-    public int size() {
-        return size;
-    }
-
-    // This allows us to access the vector without copying it to float[]
-    public VectorFloat<?> vectorFloatValue(int ord) {
-        return view.getVector(ord);
-    }
-
-    public DocIndexIterator iterator() {
-        return new DocIndexIterator() {
-            private int docId = -1;
-            private final Bits liveNodes = view.liveNodes();
-
-            @Override
-            public long cost() {
-                return size();
-            }
-
-            @Override
-            public int index() {
-                return graphNodeIdToDocMap.getJVectorNodeId(docId);
-            }
-
-            @Override
-            public int docID() {
-                return docId;
-            }
-
-            @Override
-            public int nextDoc() throws IOException {
-                // Advance to the next node docId starts from -1 which is why we need to increment docId by 1 "size"
-                // times
-                while (docId < size - 1) {
-                    docId++;
-                    if (liveNodes.get(docId)) {
-                        return docId;
-                    }
-                }
-                docId = NO_MORE_DOCS;
-
-                return docId;
-            }
-
-            @Override
-            public int advance(int target) throws IOException {
-                return slowAdvance(target);
-            }
-        };
-    }
-
-    @Override
-    public float[] vectorValue(int i) throws IOException {
-        try {
-            final VectorFloat<?> vector = vectorFloatValue(i);
-            return (float[]) vector.get();
-        } catch (Throwable e) {
-            throw new RuntimeException(e);
+  private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
+      VectorizationProvider.getInstance().getVectorTypeSupport();
+
+  private final OnDiskGraphIndex.View view;
+  private final VectorSimilarityFunction similarityFunction;
+  private final int dimension;
+  private final int size;
+  private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+
+  public JVectorFloatVectorValues(
+      OnDiskGraphIndex onDiskGraphIndex,
+      VectorSimilarityFunction similarityFunction,
+      GraphNodeIdToDocMap graphNodeIdToDocMap)
+      throws IOException {
+    this.view = onDiskGraphIndex.getView();
+    this.dimension = view.dimension();
+    this.size = view.size();
+    this.similarityFunction = similarityFunction;
+    this.graphNodeIdToDocMap = graphNodeIdToDocMap;
+  }
+
+  @Override
+  public int dimension() {
+    return dimension;
+  }
+
+  @Override
+  public int size() {
+    return size;
+  }
+
+  // This allows us to access the vector without copying it to float[]
+  public VectorFloat<?> vectorFloatValue(int ord) {
+    return view.getVector(ord);
+  }
+
+  public DocIndexIterator iterator() {
+    return new DocIndexIterator() {
+      private int docId = -1;
+      private final Bits liveNodes = view.liveNodes();
+
+      @Override
+      public long cost() {
+        return size();
+      }
+
+      @Override
+      public int index() {
+        return graphNodeIdToDocMap.getJVectorNodeId(docId);
+      }
+
+      @Override
+      public int docID() {
+        return docId;
+      }
+
+      @Override
+      public int nextDoc() throws IOException {
+        // Advance to the next node docId starts from -1 which is why we need to increment docId by
+        // 1 "size"
+        // times
+        while (docId < size - 1) {
+          docId++;
+          if (liveNodes.get(docId)) {
+            return docId;
+          }
         }
+        docId = NO_MORE_DOCS;
+
+        return docId;
+      }
+
+      @Override
+      public int advance(int target) throws IOException {
+        return slowAdvance(target);
+      }
+    };
+  }
+
+  @Override
+  public float[] vectorValue(int i) throws IOException {
+    try {
+      final VectorFloat<?> vector = vectorFloatValue(i);
+      return (float[]) vector.get();
+    } catch (Throwable e) {
+      throw new RuntimeException(e);
     }
-
-    public VectorFloat<?> vectorValueObject(int i) throws IOException {
-        return vectorFloatValue(i);
-    }
-
-    @Override
-    public FloatVectorValues copy() throws IOException {
-        return this;
-    }
-
-    @Override
-    public VectorScorer scorer(float[] query) throws IOException {
-        return new JVectorVectorScorer(this, VECTOR_TYPE_SUPPORT.createFloatVector(query), similarityFunction);
-    }
-
+  }
+
+  public VectorFloat<?> vectorValueObject(int i) throws IOException {
+    return vectorFloatValue(i);
+  }
+
+  @Override
+  public FloatVectorValues copy() throws IOException {
+    return this;
+  }
+
+  @Override
+  public VectorScorer scorer(float[] query) throws IOException {
+    return new JVectorVectorScorer(
+        this, VECTOR_TYPE_SUPPORT.createFloatVector(query), similarityFunction);
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 29cefe6598e2..a7f3a13ee865 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -17,6 +17,10 @@
 
 package org.opensearch.knn.index.codec.jvector;
 
+import java.io.IOException;
+import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.ForkJoinWorkerThread;
+import java.util.function.Function;
 import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -25,184 +29,186 @@
 import org.apache.lucene.index.SegmentWriteState;
 import org.opensearch.knn.common.KNNConstants;
 
-import java.io.IOException;
-import java.util.concurrent.ForkJoinPool;
-import java.util.concurrent.ForkJoinWorkerThread;
-import java.util.function.Function;
-
 @Log4j2
 public class JVectorFormat extends KnnVectorsFormat {
-    public static final String NAME = "JVectorFormat";
-    public static final String META_CODEC_NAME = "JVectorVectorsFormatMeta";
-    public static final String VECTOR_INDEX_CODEC_NAME = "JVectorVectorsFormatIndex";
-    public static final String NEIGHBORS_SCORE_CACHE_CODEC_NAME = "JVectorVectorsFormatNeighborsScoreCache";
-    public static final String JVECTOR_FILES_SUFFIX = "jvector";
-    public static final String META_EXTENSION = "meta-" + JVECTOR_FILES_SUFFIX;
-    public static final String VECTOR_INDEX_EXTENSION = "data-" + JVECTOR_FILES_SUFFIX;
-    public static final String NEIGHBORS_SCORE_CACHE_EXTENSION = "neighbors-score-cache-" + JVECTOR_FILES_SUFFIX;
-
-    public static final int VERSION_START = 0;
-    public static final int VERSION_CURRENT = VERSION_START;
-    public static final int DEFAULT_MAX_CONN = 32;
-    public static final int DEFAULT_BEAM_WIDTH = 100;
-    // Unfortunately, this can't be managed yet by the OpenSearch ThreadPool because it's not supporting {@link ForkJoinPool} types
-    public static final ForkJoinPool SIMD_POOL_MERGE = getPhysicalCoreExecutor();
-    public static final ForkJoinPool SIMD_POOL_FLUSH = getPhysicalCoreExecutor();
-
-    private final int maxConn;
-    private final int beamWidth;
-    private final Function<Integer, Integer> numberOfSubspacesPerVectorSupplier; // as a function of the original dimension
-    private final int minBatchSizeForQuantization;
-    private final float alpha;
-    private final float neighborOverflow;
-    private final boolean hierarchyEnabled;
-
-    public JVectorFormat() {
-        this(
-            NAME,
-            DEFAULT_MAX_CONN,
-            DEFAULT_BEAM_WIDTH,
-            KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
-            KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
-            JVectorFormat::getDefaultNumberOfSubspacesPerVector,
-            KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION,
-            KNNConstants.DEFAULT_HIERARCHY_ENABLED
-        );
-    }
-
-    public JVectorFormat(int minBatchSizeForQuantization) {
-        this(
-            NAME,
-            DEFAULT_MAX_CONN,
-            DEFAULT_BEAM_WIDTH,
-            KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
-            KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
-            JVectorFormat::getDefaultNumberOfSubspacesPerVector,
-            minBatchSizeForQuantization,
-            KNNConstants.DEFAULT_HIERARCHY_ENABLED
-        );
+  public static final String NAME = "JVectorFormat";
+  public static final String META_CODEC_NAME = "JVectorVectorsFormatMeta";
+  public static final String VECTOR_INDEX_CODEC_NAME = "JVectorVectorsFormatIndex";
+  public static final String NEIGHBORS_SCORE_CACHE_CODEC_NAME =
+      "JVectorVectorsFormatNeighborsScoreCache";
+  public static final String JVECTOR_FILES_SUFFIX = "jvector";
+  public static final String META_EXTENSION = "meta-" + JVECTOR_FILES_SUFFIX;
+  public static final String VECTOR_INDEX_EXTENSION = "data-" + JVECTOR_FILES_SUFFIX;
+  public static final String NEIGHBORS_SCORE_CACHE_EXTENSION =
+      "neighbors-score-cache-" + JVECTOR_FILES_SUFFIX;
+
+  public static final int VERSION_START = 0;
+  public static final int VERSION_CURRENT = VERSION_START;
+  public static final int DEFAULT_MAX_CONN = 32;
+  public static final int DEFAULT_BEAM_WIDTH = 100;
+  // Unfortunately, this can't be managed yet by the OpenSearch ThreadPool because it's not
+  // supporting {@link ForkJoinPool} types
+  public static final ForkJoinPool SIMD_POOL_MERGE = getPhysicalCoreExecutor();
+  public static final ForkJoinPool SIMD_POOL_FLUSH = getPhysicalCoreExecutor();
+
+  private final int maxConn;
+  private final int beamWidth;
+  private final Function<Integer, Integer>
+      numberOfSubspacesPerVectorSupplier; // as a function of the original dimension
+  private final int minBatchSizeForQuantization;
+  private final float alpha;
+  private final float neighborOverflow;
+  private final boolean hierarchyEnabled;
+
+  public JVectorFormat() {
+    this(
+        NAME,
+        DEFAULT_MAX_CONN,
+        DEFAULT_BEAM_WIDTH,
+        KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
+        KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
+        JVectorFormat::getDefaultNumberOfSubspacesPerVector,
+        KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION,
+        KNNConstants.DEFAULT_HIERARCHY_ENABLED);
+  }
+
+  public JVectorFormat(int minBatchSizeForQuantization) {
+    this(
+        NAME,
+        DEFAULT_MAX_CONN,
+        DEFAULT_BEAM_WIDTH,
+        KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
+        KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
+        JVectorFormat::getDefaultNumberOfSubspacesPerVector,
+        minBatchSizeForQuantization,
+        KNNConstants.DEFAULT_HIERARCHY_ENABLED);
+  }
+
+  public JVectorFormat(
+      int maxConn,
+      int beamWidth,
+      float neighborOverflow,
+      float alpha,
+      Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+      int minBatchSizeForQuantization,
+      boolean hierarchyEnabled) {
+    this(
+        NAME,
+        maxConn,
+        beamWidth,
+        neighborOverflow,
+        alpha,
+        numberOfSubspacesPerVectorSupplier,
+        minBatchSizeForQuantization,
+        hierarchyEnabled);
+  }
+
+  public JVectorFormat(
+      String name,
+      int maxConn,
+      int beamWidth,
+      float neighborOverflow,
+      float alpha,
+      Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+      int minBatchSizeForQuantization,
+      boolean hierarchyEnabled) {
+    super(name);
+    this.maxConn = maxConn;
+    this.beamWidth = beamWidth;
+    this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
+    this.minBatchSizeForQuantization = minBatchSizeForQuantization;
+    this.alpha = alpha;
+    this.neighborOverflow = neighborOverflow;
+    this.hierarchyEnabled = hierarchyEnabled;
+  }
+
+  @Override
+  public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
+    return new JVectorWriter(
+        state,
+        maxConn,
+        beamWidth,
+        neighborOverflow,
+        alpha,
+        numberOfSubspacesPerVectorSupplier,
+        minBatchSizeForQuantization,
+        hierarchyEnabled);
+  }
+
+  @Override
+  public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
+    return new JVectorReader(state);
+  }
+
+  @Override
+  public int getMaxDimensions(String s) {
+    // Not a hard limit, but a reasonable default
+    return 8192;
+  }
+
+  /**
+   * This method returns the default number of subspaces per vector for a given original dimension.
+   * Should be used as a default value for the number of subspaces per vector in case no value is
+   * provided.
+   *
+   * @param originalDimension original vector dimension
+   * @return default number of subspaces per vector
+   */
+  public static int getDefaultNumberOfSubspacesPerVector(int originalDimension) {
+    // the idea here is that higher dimensions compress well, but not so well that we should use
+    // fewer bits
+    // than a lower-dimension vector, which is what you could get with cutoff points to switch
+    // between (e.g.)
+    // D*0.5 and D*0.25. Thus, the following ensures that bytes per vector is strictly increasing
+    // with D.
+    int compressedBytes;
+    if (originalDimension <= 32) {
+      // We are compressing from 4-byte floats to single-byte codebook indexes,
+      // so this represents compression of 4x
+      // * GloVe-25 needs 25 BPV to achieve good recall
+      compressedBytes = originalDimension;
+    } else if (originalDimension <= 64) {
+      // * GloVe-50 performs fine at 25
+      compressedBytes = 32;
+    } else if (originalDimension <= 200) {
+      // * GloVe-100 and -200 perform well at 50 and 100 BPV, respectively
+      compressedBytes = (int) (originalDimension * 0.5);
+    } else if (originalDimension <= 400) {
+      // * NYTimes-256 actually performs fine at 64 BPV but we'll be conservative
+      // since we don't want BPV to decrease
+      compressedBytes = 100;
+    } else if (originalDimension <= 768) {
+      // allow BPV to increase linearly up to 192
+      compressedBytes = (int) (originalDimension * 0.25);
+    } else if (originalDimension <= 1536) {
+      // * ada002 vectors have good recall even at 192 BPV = compression of 32x
+      compressedBytes = 192;
+    } else {
+      // We have not tested recall with larger vectors than this, let's let it increase linearly
+      compressedBytes = (int) (originalDimension * 0.125);
     }
+    return compressedBytes;
+  }
 
-    public JVectorFormat(
-        int maxConn,
-        int beamWidth,
-        float neighborOverflow,
-        float alpha,
-        Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
-        int minBatchSizeForQuantization,
-        boolean hierarchyEnabled
-    ) {
-        this(
-            NAME,
-            maxConn,
-            beamWidth,
-            neighborOverflow,
-            alpha,
-            numberOfSubspacesPerVectorSupplier,
-            minBatchSizeForQuantization,
-            hierarchyEnabled
-        );
-    }
-
-    public JVectorFormat(
-        String name,
-        int maxConn,
-        int beamWidth,
-        float neighborOverflow,
-        float alpha,
-        Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
-        int minBatchSizeForQuantization,
-        boolean hierarchyEnabled
-    ) {
-        super(name);
-        this.maxConn = maxConn;
-        this.beamWidth = beamWidth;
-        this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
-        this.minBatchSizeForQuantization = minBatchSizeForQuantization;
-        this.alpha = alpha;
-        this.neighborOverflow = neighborOverflow;
-        this.hierarchyEnabled = hierarchyEnabled;
-    }
-
-    @Override
-    public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
-        return new JVectorWriter(
-            state,
-            maxConn,
-            beamWidth,
-            neighborOverflow,
-            alpha,
-            numberOfSubspacesPerVectorSupplier,
-            minBatchSizeForQuantization,
-            hierarchyEnabled
-        );
-    }
-
-    @Override
-    public KnnVectorsReader fieldsReader(SegmentReadState state) throws IOException {
-        return new JVectorReader(state);
-    }
-
-    @Override
-    public int getMaxDimensions(String s) {
-        // Not a hard limit, but a reasonable default
-        return 8192;
-    }
-
-    /**
-     * This method returns the default number of subspaces per vector for a given original dimension.
-     * Should be used as a default value for the number of subspaces per vector in case no value is provided.
-     *
-     * @param originalDimension original vector dimension
-     * @return default number of subspaces per vector
-     */
-    public static int getDefaultNumberOfSubspacesPerVector(int originalDimension) {
-        // the idea here is that higher dimensions compress well, but not so well that we should use fewer bits
-        // than a lower-dimension vector, which is what you could get with cutoff points to switch between (e.g.)
-        // D*0.5 and D*0.25. Thus, the following ensures that bytes per vector is strictly increasing with D.
-        int compressedBytes;
-        if (originalDimension <= 32) {
-            // We are compressing from 4-byte floats to single-byte codebook indexes,
-            // so this represents compression of 4x
-            // * GloVe-25 needs 25 BPV to achieve good recall
-            compressedBytes = originalDimension;
-        } else if (originalDimension <= 64) {
-            // * GloVe-50 performs fine at 25
-            compressedBytes = 32;
-        } else if (originalDimension <= 200) {
-            // * GloVe-100 and -200 perform well at 50 and 100 BPV, respectively
-            compressedBytes = (int) (originalDimension * 0.5);
-        } else if (originalDimension <= 400) {
-            // * NYTimes-256 actually performs fine at 64 BPV but we'll be conservative
-            // since we don't want BPV to decrease
-            compressedBytes = 100;
-        } else if (originalDimension <= 768) {
-            // allow BPV to increase linearly up to 192
-            compressedBytes = (int) (originalDimension * 0.25);
-        } else if (originalDimension <= 1536) {
-            // * ada002 vectors have good recall even at 192 BPV = compression of 32x
-            compressedBytes = 192;
-        } else {
-            // We have not tested recall with larger vectors than this, let's let it increase linearly
-            compressedBytes = (int) (originalDimension * 0.125);
-        }
-        return compressedBytes;
-    }
-
-    public static ForkJoinPool getPhysicalCoreExecutor() {
-        final int estimatedPhysicalCoreCount = Integer.getInteger(
+  public static ForkJoinPool getPhysicalCoreExecutor() {
+    final int estimatedPhysicalCoreCount =
+        Integer.getInteger(
             "jvector.physical_core_count",
-            Math.max(1, Runtime.getRuntime().availableProcessors() / 2)
-        );
-        assert estimatedPhysicalCoreCount > 0 && estimatedPhysicalCoreCount <= Runtime.getRuntime().availableProcessors()
-            : "Invalid core count: " + estimatedPhysicalCoreCount;
-        final ForkJoinPool.ForkJoinWorkerThreadFactory factory = pool -> {
-            ForkJoinWorkerThread thread = ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
-            thread.setPriority(Thread.NORM_PRIORITY - 2);
-            return thread;
+            Math.max(1, Runtime.getRuntime().availableProcessors() / 2));
+    assert estimatedPhysicalCoreCount > 0
+            && estimatedPhysicalCoreCount <= Runtime.getRuntime().availableProcessors()
+        : "Invalid core count: " + estimatedPhysicalCoreCount;
+    final ForkJoinPool.ForkJoinWorkerThreadFactory factory =
+        pool -> {
+          ForkJoinWorkerThread thread =
+              ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
+          thread.setPriority(Thread.NORM_PRIORITY - 2);
+          return thread;
         };
 
-        log.info("Creating SIMD ForkJoinPool with {} physical cores for JVector SIMD operations", estimatedPhysicalCoreCount);
-        return new ForkJoinPool(estimatedPhysicalCoreCount, factory, null, true);
-    }
+    log.info(
+        "Creating SIMD ForkJoinPool with {} physical cores for JVector SIMD operations",
+        estimatedPhysicalCoreCount);
+    return new ForkJoinPool(estimatedPhysicalCoreCount, factory, null, true);
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
index c4cc2f715bec..3a99635582a7 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -18,100 +18,102 @@
 package org.opensearch.knn.index.codec.jvector;
 
 import io.github.jbellis.jvector.disk.IndexWriter;
+import java.io.IOException;
 import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.store.IndexOutput;
 
-import java.io.IOException;
-
 /**
  * JVectorRandomAccessWriter is a wrapper around IndexOutput that implements RandomAccessWriter.
  * Note: This is not thread safe!
  */
 @Log4j2
 public class JVectorIndexWriter implements IndexWriter {
-    private final IndexOutput indexOutputDelegate;
-
-    public JVectorIndexWriter(IndexOutput indexOutputDelegate) {
-        this.indexOutputDelegate = indexOutputDelegate;
-    }
-
-    @Override
-    public long position() throws IOException {
-        return indexOutputDelegate.getFilePointer();
-    }
-
-    @Override
-    public void close() throws IOException {
-        indexOutputDelegate.close();
-    }
-
-    @Override
-    public void write(int b) throws IOException {
-        indexOutputDelegate.writeByte((byte) b);
-    }
-
-    @Override
-    public void write(byte[] b) throws IOException {
-        indexOutputDelegate.writeBytes(b, 0, b.length);
-    }
-
-    @Override
-    public void write(byte[] b, int off, int len) throws IOException {
-        indexOutputDelegate.writeBytes(b, off, len);
-    }
-
-    @Override
-    public void writeBoolean(boolean v) throws IOException {
-        indexOutputDelegate.writeByte((byte) (v ? 1 : 0));
-    }
-
-    @Override
-    public void writeByte(int v) throws IOException {
-        indexOutputDelegate.writeByte((byte) v);
-    }
-
-    @Override
-    public void writeShort(int v) throws IOException {
-        indexOutputDelegate.writeShort((short) v);
-    }
-
-    @Override
-    public void writeChar(int v) throws IOException {
-        throw new UnsupportedOperationException("JVectorRandomAccessWriter does not support writing chars");
-    }
-
-    @Override
-    public void writeInt(int v) throws IOException {
-        indexOutputDelegate.writeInt(v);
-    }
-
-    @Override
-    public void writeLong(long v) throws IOException {
-        indexOutputDelegate.writeLong(v);
-    }
-
-    @Override
-    public void writeFloat(float v) throws IOException {
-        indexOutputDelegate.writeInt(Float.floatToIntBits(v));
-    }
-
-    @Override
-    public void writeDouble(double v) throws IOException {
-        writeLong(Double.doubleToLongBits(v));
-    }
-
-    @Override
-    public void writeBytes(String s) throws IOException {
-        throw new UnsupportedOperationException("JVectorIndexWriter does not support writing String as bytes");
-    }
-
-    @Override
-    public void writeChars(String s) throws IOException {
-        throw new UnsupportedOperationException("JVectorIndexWriter does not support writing chars");
-    }
-
-    @Override
-    public void writeUTF(String s) throws IOException {
-        throw new UnsupportedOperationException("JVectorIndexWriter does not support writing UTF strings");
-    }
+  private final IndexOutput indexOutputDelegate;
+
+  public JVectorIndexWriter(IndexOutput indexOutputDelegate) {
+    this.indexOutputDelegate = indexOutputDelegate;
+  }
+
+  @Override
+  public long position() throws IOException {
+    return indexOutputDelegate.getFilePointer();
+  }
+
+  @Override
+  public void close() throws IOException {
+    indexOutputDelegate.close();
+  }
+
+  @Override
+  public void write(int b) throws IOException {
+    indexOutputDelegate.writeByte((byte) b);
+  }
+
+  @Override
+  public void write(byte[] b) throws IOException {
+    indexOutputDelegate.writeBytes(b, 0, b.length);
+  }
+
+  @Override
+  public void write(byte[] b, int off, int len) throws IOException {
+    indexOutputDelegate.writeBytes(b, off, len);
+  }
+
+  @Override
+  public void writeBoolean(boolean v) throws IOException {
+    indexOutputDelegate.writeByte((byte) (v ? 1 : 0));
+  }
+
+  @Override
+  public void writeByte(int v) throws IOException {
+    indexOutputDelegate.writeByte((byte) v);
+  }
+
+  @Override
+  public void writeShort(int v) throws IOException {
+    indexOutputDelegate.writeShort((short) v);
+  }
+
+  @Override
+  public void writeChar(int v) throws IOException {
+    throw new UnsupportedOperationException(
+        "JVectorRandomAccessWriter does not support writing chars");
+  }
+
+  @Override
+  public void writeInt(int v) throws IOException {
+    indexOutputDelegate.writeInt(v);
+  }
+
+  @Override
+  public void writeLong(long v) throws IOException {
+    indexOutputDelegate.writeLong(v);
+  }
+
+  @Override
+  public void writeFloat(float v) throws IOException {
+    indexOutputDelegate.writeInt(Float.floatToIntBits(v));
+  }
+
+  @Override
+  public void writeDouble(double v) throws IOException {
+    writeLong(Double.doubleToLongBits(v));
+  }
+
+  @Override
+  public void writeBytes(String s) throws IOException {
+    throw new UnsupportedOperationException(
+        "JVectorIndexWriter does not support writing String as bytes");
+  }
+
+  @Override
+  public void writeChars(String s) throws IOException {
+    throw new UnsupportedOperationException("JVectorIndexWriter does not support writing chars");
+  }
+
+  @Override
+  public void writeUTF(String s) throws IOException {
+    throw new UnsupportedOperationException(
+        "JVectorIndexWriter does not support writing UTF strings");
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
index 32b35af7c012..c5490349ef0a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
@@ -18,62 +18,63 @@
 
 import lombok.Value;
 import org.apache.lucene.search.KnnCollector;
-import org.apache.lucene.search.knn.KnnSearchStrategy;
 import org.apache.lucene.search.TopDocs;
+import org.apache.lucene.search.knn.KnnSearchStrategy;
 
 /**
- * Wrapper class for KnnCollector that provides passing of additional parameters specific for JVector.
+ * Wrapper class for KnnCollector that provides passing of additional parameters specific for
+ * JVector.
  */
 @Value
 public class JVectorKnnCollector implements KnnCollector {
-    KnnCollector delegate;
-    float threshold;
-    float rerankFloor;
-    int overQueryFactor;
-    boolean usePruning;
+  KnnCollector delegate;
+  float threshold;
+  float rerankFloor;
+  int overQueryFactor;
+  boolean usePruning;
 
-    @Override
-    public boolean earlyTerminated() {
-        return delegate.earlyTerminated();
-    }
+  @Override
+  public boolean earlyTerminated() {
+    return delegate.earlyTerminated();
+  }
 
-    @Override
-    public void incVisitedCount(int count) {
-        delegate.incVisitedCount(count);
-    }
+  @Override
+  public void incVisitedCount(int count) {
+    delegate.incVisitedCount(count);
+  }
 
-    @Override
-    public long visitedCount() {
-        return delegate.visitedCount();
-    }
+  @Override
+  public long visitedCount() {
+    return delegate.visitedCount();
+  }
 
-    @Override
-    public long visitLimit() {
-        return delegate.visitLimit();
-    }
+  @Override
+  public long visitLimit() {
+    return delegate.visitLimit();
+  }
 
-    @Override
-    public int k() {
-        return delegate.k();
-    }
+  @Override
+  public int k() {
+    return delegate.k();
+  }
 
-    @Override
-    public boolean collect(int docId, float similarity) {
-        return delegate.collect(docId, similarity);
-    }
+  @Override
+  public boolean collect(int docId, float similarity) {
+    return delegate.collect(docId, similarity);
+  }
 
-    @Override
-    public float minCompetitiveSimilarity() {
-        return delegate.minCompetitiveSimilarity();
-    }
+  @Override
+  public float minCompetitiveSimilarity() {
+    return delegate.minCompetitiveSimilarity();
+  }
 
-    @Override
-    public TopDocs topDocs() {
-        return delegate.topDocs();
-    }
+  @Override
+  public TopDocs topDocs() {
+    return delegate.topDocs();
+  }
 
-    @Override
-    public KnnSearchStrategy getSearchStrategy() {
-        return delegate.getSearchStrategy();
-    }
+  @Override
+  public KnnSearchStrategy getSearchStrategy() {
+    return delegate.getSearchStrategy();
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
index 1ee729db1543..d2ece0b9eebc 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
@@ -16,6 +16,7 @@
  */
 package org.opensearch.knn.index.codec.jvector;
 
+import java.io.IOException;
 import org.apache.lucene.index.FloatVectorValues;
 import org.apache.lucene.index.LeafReader;
 import org.apache.lucene.index.LeafReaderContext;
@@ -24,72 +25,71 @@
 import org.apache.lucene.search.knn.KnnSearchStrategy;
 import org.apache.lucene.util.Bits;
 
-import java.io.IOException;
-
 /**
- * {@link KnnFloatVectorQuery} that uses jVector to perform the search.
- * We use this wrapper simply because we can't pass jVector specific parameters with the upstream {@link KnnFloatVectorQuery}.
+ * {@link KnnFloatVectorQuery} that uses jVector to perform the search. We use this wrapper simply
+ * because we can't pass jVector specific parameters with the upstream {@link KnnFloatVectorQuery}.
  */
 public class JVectorKnnFloatVectorQuery extends KnnFloatVectorQuery {
-    private static final TopDocs NO_RESULTS = TopDocsCollector.EMPTY_TOPDOCS;
-    private final int overQueryFactor;
-    private final float threshold;
-    private final float rerankFloor;
-    private final boolean usePruning;
+  private static final TopDocs NO_RESULTS = TopDocsCollector.EMPTY_TOPDOCS;
+  private final int overQueryFactor;
+  private final float threshold;
+  private final float rerankFloor;
+  private final boolean usePruning;
 
-    public JVectorKnnFloatVectorQuery(
-        String field,
-        float[] target,
-        int k,
-        int overQueryFactor,
-        float threshold,
-        float rerankFloor,
-        boolean usePruning
-    ) {
-        super(field, target, k);
-        this.overQueryFactor = overQueryFactor;
-        this.threshold = threshold;
-        this.rerankFloor = rerankFloor;
-        this.usePruning = usePruning;
-    }
+  public JVectorKnnFloatVectorQuery(
+      String field,
+      float[] target,
+      int k,
+      int overQueryFactor,
+      float threshold,
+      float rerankFloor,
+      boolean usePruning) {
+    super(field, target, k);
+    this.overQueryFactor = overQueryFactor;
+    this.threshold = threshold;
+    this.rerankFloor = rerankFloor;
+    this.usePruning = usePruning;
+  }
 
-    public JVectorKnnFloatVectorQuery(
-        String field,
-        float[] target,
-        int k,
-        Query filter,
-        int overQueryFactor,
-        float threshold,
-        float rerankFloor,
-        boolean usePruning
-    ) {
-        super(field, target, k, filter);
-        this.overQueryFactor = overQueryFactor;
-        this.threshold = threshold;
-        this.rerankFloor = rerankFloor;
-        this.usePruning = usePruning;
-    }
+  public JVectorKnnFloatVectorQuery(
+      String field,
+      float[] target,
+      int k,
+      Query filter,
+      int overQueryFactor,
+      float threshold,
+      float rerankFloor,
+      boolean usePruning) {
+    super(field, target, k, filter);
+    this.overQueryFactor = overQueryFactor;
+    this.threshold = threshold;
+    this.rerankFloor = rerankFloor;
+    this.usePruning = usePruning;
+  }
 
-    @Override
-    protected TopDocs approximateSearch(
-        LeafReaderContext context,
-        Bits acceptDocs,
-        int visitedLimit,
-        KnnCollectorManager knnCollectorManager
-    ) throws IOException {
-        final KnnCollector delegateCollector = knnCollectorManager.newCollector(visitedLimit, KnnSearchStrategy.Hnsw.DEFAULT, context);
-        final KnnCollector knnCollector = new JVectorKnnCollector(delegateCollector, threshold, rerankFloor, overQueryFactor, usePruning);
-        LeafReader reader = context.reader();
-        FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field);
-        if (floatVectorValues == null) {
-            FloatVectorValues.checkField(reader, field);
-            return NO_RESULTS;
-        }
-        if (Math.min(knnCollector.k(), floatVectorValues.size()) == 0) {
-            return NO_RESULTS;
-        }
-        reader.searchNearestVectors(field, getTargetCopy(), knnCollector, acceptDocs);
-        TopDocs results = knnCollector.topDocs();
-        return results != null ? results : NO_RESULTS;
+  @Override
+  protected TopDocs approximateSearch(
+      LeafReaderContext context,
+      Bits acceptDocs,
+      int visitedLimit,
+      KnnCollectorManager knnCollectorManager)
+      throws IOException {
+    final KnnCollector delegateCollector =
+        knnCollectorManager.newCollector(visitedLimit, KnnSearchStrategy.Hnsw.DEFAULT, context);
+    final KnnCollector knnCollector =
+        new JVectorKnnCollector(
+            delegateCollector, threshold, rerankFloor, overQueryFactor, usePruning);
+    LeafReader reader = context.reader();
+    FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field);
+    if (floatVectorValues == null) {
+      FloatVectorValues.checkField(reader, field);
+      return NO_RESULTS;
+    }
+    if (Math.min(knnCollector.k(), floatVectorValues.size()) == 0) {
+      return NO_RESULTS;
     }
+    reader.searchNearestVectors(field, getTargetCopy(), knnCollector, acceptDocs);
+    TopDocs results = knnCollector.topDocs();
+    return results != null ? results : NO_RESULTS;
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 0599ff2121cb..25f49a897c76 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -19,168 +19,177 @@
 
 import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.disk.ReaderSupplier;
-import lombok.extern.log4j.Log4j2;
-import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.IOUtils;
-
 import java.io.EOFException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.FloatBuffer;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
+import lombok.extern.log4j.Log4j2;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.util.IOUtils;
 
 @Log4j2
 public class JVectorRandomAccessReader implements RandomAccessReader {
-    private final byte[] internalBuffer = new byte[Long.BYTES];
-    private final byte[] internalFloatBuffer = new byte[Float.BYTES];
-    private final IndexInput indexInputDelegate;
-    private volatile boolean closed = false;
-
-    public JVectorRandomAccessReader(IndexInput indexInputDelegate) {
-        this.indexInputDelegate = indexInputDelegate;
-    }
-
-    @Override
-    public void seek(long offset) throws IOException {
-        indexInputDelegate.seek(offset);
-    }
-
-    @Override
-    public long getPosition() throws IOException {
-        return indexInputDelegate.getFilePointer();
+  private final byte[] internalBuffer = new byte[Long.BYTES];
+  private final byte[] internalFloatBuffer = new byte[Float.BYTES];
+  private final IndexInput indexInputDelegate;
+  private volatile boolean closed = false;
+
+  public JVectorRandomAccessReader(IndexInput indexInputDelegate) {
+    this.indexInputDelegate = indexInputDelegate;
+  }
+
+  @Override
+  public void seek(long offset) throws IOException {
+    indexInputDelegate.seek(offset);
+  }
+
+  @Override
+  public long getPosition() throws IOException {
+    return indexInputDelegate.getFilePointer();
+  }
+
+  @Override
+  public int readInt() throws IOException {
+    return indexInputDelegate.readInt();
+  }
+
+  @Override
+  public float readFloat() throws IOException {
+    return Float.intBitsToFloat(indexInputDelegate.readInt());
+  }
+
+  // TODO: bring back to override when upgrading jVector again
+  // @Override
+  public long readLong() throws IOException {
+    return indexInputDelegate.readLong();
+  }
+
+  @Override
+  public void readFully(byte[] bytes) throws IOException {
+    indexInputDelegate.readBytes(bytes, 0, bytes.length);
+  }
+
+  @Override
+  public void readFully(ByteBuffer buffer) throws IOException {
+    // validate that the requested bytes actually exist ----
+    long remainingInFile = indexInputDelegate.length() - indexInputDelegate.getFilePointer();
+    if (buffer.remaining() > remainingInFile) {
+      throw new EOFException(
+          "Requested " + buffer.remaining() + " bytes but only " + remainingInFile + " available");
     }
 
-    @Override
-    public int readInt() throws IOException {
-        return indexInputDelegate.readInt();
+    // Heap buffers with a backing array can be filled in one call ----
+    if (buffer.hasArray()) {
+      int off = buffer.arrayOffset() + buffer.position();
+      int len = buffer.remaining();
+      indexInputDelegate.readBytes(buffer.array(), off, len);
+      buffer.position(buffer.limit()); // advance fully
+      return;
     }
 
-    @Override
-    public float readFloat() throws IOException {
-        return Float.intBitsToFloat(indexInputDelegate.readInt());
+    // Direct / non-array buffers: copy in reasonable chunks ----
+    while (buffer.hasRemaining()) {
+      final int bytesToRead = Math.min(buffer.remaining(), Long.BYTES);
+      indexInputDelegate.readBytes(this.internalBuffer, 0, bytesToRead);
+      buffer.put(this.internalBuffer, 0, bytesToRead);
     }
+  }
 
-    // TODO: bring back to override when upgrading jVector again
-    // @Override
-    public long readLong() throws IOException {
-        return indexInputDelegate.readLong();
+  @Override
+  public void readFully(long[] vector) throws IOException {
+    for (int i = 0; i < vector.length; i++) {
+      vector[i] = readLong();
     }
+  }
 
-    @Override
-    public void readFully(byte[] bytes) throws IOException {
-        indexInputDelegate.readBytes(bytes, 0, bytes.length);
+  @Override
+  public void read(int[] ints, int offset, int count) throws IOException {
+    for (int i = 0; i < count; i++) {
+      ints[offset + i] = readInt();
     }
-
-    @Override
-    public void readFully(ByteBuffer buffer) throws IOException {
-        // validate that the requested bytes actually exist ----
-        long remainingInFile = indexInputDelegate.length() - indexInputDelegate.getFilePointer();
-        if (buffer.remaining() > remainingInFile) {
-            throw new EOFException("Requested " + buffer.remaining() + " bytes but only " + remainingInFile + " available");
-        }
-
-        // Heap buffers with a backing array can be filled in one call ----
-        if (buffer.hasArray()) {
-            int off = buffer.arrayOffset() + buffer.position();
-            int len = buffer.remaining();
-            indexInputDelegate.readBytes(buffer.array(), off, len);
-            buffer.position(buffer.limit());           // advance fully
-            return;
-        }
-
-        // Direct / non-array buffers: copy in reasonable chunks ----
-        while (buffer.hasRemaining()) {
-            final int bytesToRead = Math.min(buffer.remaining(), Long.BYTES);
-            indexInputDelegate.readBytes(this.internalBuffer, 0, bytesToRead);
-            buffer.put(this.internalBuffer, 0, bytesToRead);
-        }
-    }
-
-    @Override
-    public void readFully(long[] vector) throws IOException {
-        for (int i = 0; i < vector.length; i++) {
-            vector[i] = readLong();
-        }
+  }
+
+  @Override
+  public void read(float[] floats, int offset, int count) throws IOException {
+    final ByteBuffer byteBuffer = ByteBuffer.allocate(Float.BYTES * count);
+    indexInputDelegate.readBytes(byteBuffer.array(), offset, Float.BYTES * count);
+    FloatBuffer buffer = byteBuffer.asFloatBuffer();
+    buffer.get(floats, offset, count);
+  }
+
+  @Override
+  public void close() throws IOException {
+    log.debug("Closing JVectorRandomAccessReader for file: {}", indexInputDelegate);
+    this.closed = true;
+    // no need to really close the index input delegate since it is a clone
+    log.debug("Closed JVectorRandomAccessReader for file: {}", indexInputDelegate);
+  }
+
+  @Override
+  public long length() throws IOException {
+    return indexInputDelegate.length();
+  }
+
+  /**
+   * Supplies readers which are actually slices of the original IndexInput. We will vend out slices
+   * in order for us to easily find the footer of the jVector graph index. This is useful because
+   * our logic that reads the graph that the footer is always at {@link IndexInput#length()} of the
+   * slice. Which is how {@link
+   * io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)} is working
+   * behind the scenes. The header offset, on the other hand, is flexible because we can provide it
+   * as a parameter to {@link
+   * io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)}
+   */
+  public static class Supplier implements ReaderSupplier {
+    private final AtomicInteger readerCount = new AtomicInteger(0);
+    private final IndexInput currentInput;
+    private final long sliceStartOffset;
+    private final long sliceLength;
+    private final ConcurrentHashMap<Integer, RandomAccessReader> readers =
+        new ConcurrentHashMap<>();
+
+    public Supplier(IndexInput indexInput) throws IOException {
+      this(
+          indexInput,
+          indexInput.getFilePointer(),
+          indexInput.length() - indexInput.getFilePointer());
     }
 
-    @Override
-    public void read(int[] ints, int offset, int count) throws IOException {
-        for (int i = 0; i < count; i++) {
-            ints[offset + i] = readInt();
-        }
+    public Supplier(IndexInput indexInput, long sliceStartOffset, long sliceLength)
+        throws IOException {
+      this.currentInput = indexInput;
+      this.sliceStartOffset = sliceStartOffset;
+      this.sliceLength = sliceLength;
     }
 
     @Override
-    public void read(float[] floats, int offset, int count) throws IOException {
-        final ByteBuffer byteBuffer = ByteBuffer.allocate(Float.BYTES * count);
-        indexInputDelegate.readBytes(byteBuffer.array(), offset, Float.BYTES * count);
-        FloatBuffer buffer = byteBuffer.asFloatBuffer();
-        buffer.get(floats, offset, count);
+    public RandomAccessReader get() throws IOException {
+      synchronized (this) {
+        final IndexInput input =
+            currentInput
+                .slice("Input Slice for the jVector graph or PQ", sliceStartOffset, sliceLength)
+                .clone();
+
+        var reader = new JVectorRandomAccessReader(input);
+        int readerId = readerCount.getAndIncrement();
+        readers.put(readerId, reader);
+        return reader;
+      }
     }
 
     @Override
     public void close() throws IOException {
-        log.debug("Closing JVectorRandomAccessReader for file: {}", indexInputDelegate);
-        this.closed = true;
-        // no need to really close the index input delegate since it is a clone
-        log.debug("Closed JVectorRandomAccessReader for file: {}", indexInputDelegate);
-    }
-
-    @Override
-    public long length() throws IOException {
-        return indexInputDelegate.length();
-    }
-
-    /**
-     * Supplies readers which are actually slices of the original IndexInput.
-     * We will vend out slices in order for us to easily find the footer of the jVector graph index.
-     * This is useful because our logic that reads the graph that the footer is always at {@link IndexInput#length()} of the slice.
-     * Which is how {@link io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)} is working behind the scenes.
-     * The header offset, on the other hand, is flexible because we can provide it as a parameter to {@link io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)}
-     */
-    public static class Supplier implements ReaderSupplier {
-        private final AtomicInteger readerCount = new AtomicInteger(0);
-        private final IndexInput currentInput;
-        private final long sliceStartOffset;
-        private final long sliceLength;
-        private final ConcurrentHashMap<Integer, RandomAccessReader> readers = new ConcurrentHashMap<>();
-
-        public Supplier(IndexInput indexInput) throws IOException {
-            this(indexInput, indexInput.getFilePointer(), indexInput.length() - indexInput.getFilePointer());
-        }
-
-        public Supplier(IndexInput indexInput, long sliceStartOffset, long sliceLength) throws IOException {
-            this.currentInput = indexInput;
-            this.sliceStartOffset = sliceStartOffset;
-            this.sliceLength = sliceLength;
-        }
-
-        @Override
-        public RandomAccessReader get() throws IOException {
-            synchronized (this) {
-                final IndexInput input = currentInput.slice("Input Slice for the jVector graph or PQ", sliceStartOffset, sliceLength)
-                    .clone();
-
-                var reader = new JVectorRandomAccessReader(input);
-                int readerId = readerCount.getAndIncrement();
-                readers.put(readerId, reader);
-                return reader;
-            }
-
-        }
-
-        @Override
-        public void close() throws IOException {
-            // Close source of all cloned inputs
-            IOUtils.closeWhileHandlingException(currentInput);
-
-            // Close all readers
-            for (RandomAccessReader reader : readers.values()) {
-                IOUtils.closeWhileHandlingException(reader::close);
-            }
-            readers.clear();
-            readerCount.set(0);
-        }
+      // Close source of all cloned inputs
+      IOUtils.closeWhileHandlingException(currentInput);
+
+      // Close all readers
+      for (RandomAccessReader reader : readers.values()) {
+        IOUtils.closeWhileHandlingException(reader::close);
+      }
+      readers.clear();
+      readerCount.set(0);
     }
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 8e36c1c3dda3..95a98830ff5d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -27,9 +27,16 @@
 import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
 import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.quantization.ProductQuantization;
+import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
+import java.io.Closeable;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.Optional;
 import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -38,357 +45,360 @@
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
-import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import org.opensearch.knn.common.KNNConstants;
 import org.opensearch.knn.plugin.stats.KNNCounter;
 
-import java.io.Closeable;
-import java.io.IOException;
-
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Optional;
-
 @Log4j2
 public class JVectorReader extends KnnVectorsReader {
-    private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
-
-    private final FieldInfos fieldInfos;
-    private final String baseDataFileName;
-    // Maps field name to field entries
-    private final Map<String, FieldEntry> fieldEntryMap = new HashMap<>(1);
-    private final Directory directory;
-    private final SegmentReadState state;
-
-    public JVectorReader(SegmentReadState state) throws IOException {
-        this.state = state;
-        this.fieldInfos = state.fieldInfos;
-        this.baseDataFileName = state.segmentInfo.name + "_" + state.segmentSuffix;
-        final String metaFileName = IndexFileNames.segmentFileName(
-            state.segmentInfo.name,
-            state.segmentSuffix,
-            JVectorFormat.META_EXTENSION
-        );
-        this.directory = state.directory;
-        boolean success = false;
-        try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName)) {
-            CodecUtil.checkIndexHeader(
-                meta,
-                JVectorFormat.META_CODEC_NAME,
-                JVectorFormat.VERSION_START,
-                JVectorFormat.VERSION_CURRENT,
-                state.segmentInfo.getId(),
-                state.segmentSuffix
-            );
-            readFields(meta);
-            CodecUtil.checkFooter(meta);
-
-            success = true;
-        } finally {
-            if (!success) {
-                IOUtils.closeWhileHandlingException(this);
-            }
-        }
+  private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
+      VectorizationProvider.getInstance().getVectorTypeSupport();
+
+  private final FieldInfos fieldInfos;
+  private final String baseDataFileName;
+  // Maps field name to field entries
+  private final Map<String, FieldEntry> fieldEntryMap = new HashMap<>(1);
+  private final Directory directory;
+  private final SegmentReadState state;
+
+  public JVectorReader(SegmentReadState state) throws IOException {
+    this.state = state;
+    this.fieldInfos = state.fieldInfos;
+    this.baseDataFileName = state.segmentInfo.name + "_" + state.segmentSuffix;
+    final String metaFileName =
+        IndexFileNames.segmentFileName(
+            state.segmentInfo.name, state.segmentSuffix, JVectorFormat.META_EXTENSION);
+    this.directory = state.directory;
+    boolean success = false;
+    try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName)) {
+      CodecUtil.checkIndexHeader(
+          meta,
+          JVectorFormat.META_CODEC_NAME,
+          JVectorFormat.VERSION_START,
+          JVectorFormat.VERSION_CURRENT,
+          state.segmentInfo.getId(),
+          state.segmentSuffix);
+      readFields(meta);
+      CodecUtil.checkFooter(meta);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
     }
-
-    @Override
-    public void checkIntegrity() throws IOException {
-        for (FieldEntry fieldEntry : fieldEntryMap.values()) {
-            // Verify the vector index file
-            try (var indexInput = state.directory.openInput(fieldEntry.vectorIndexFieldDataFileName, IOContext.READONCE)) {
-                CodecUtil.checksumEntireFile(indexInput);
-            }
-
-            // Verify the neighbors score cache file
-            try (var indexInput = state.directory.openInput(fieldEntry.neighborsScoreCacheIndexFieldFileName, IOContext.READONCE)) {
-                CodecUtil.checksumEntireFile(indexInput);
-            }
-        }
+  }
+
+  @Override
+  public void checkIntegrity() throws IOException {
+    for (FieldEntry fieldEntry : fieldEntryMap.values()) {
+      // Verify the vector index file
+      try (var indexInput =
+          state.directory.openInput(fieldEntry.vectorIndexFieldDataFileName, IOContext.READONCE)) {
+        CodecUtil.checksumEntireFile(indexInput);
+      }
+
+      // Verify the neighbors score cache file
+      try (var indexInput =
+          state.directory.openInput(
+              fieldEntry.neighborsScoreCacheIndexFieldFileName, IOContext.READONCE)) {
+        CodecUtil.checksumEntireFile(indexInput);
+      }
     }
-
-    @Override
-    public FloatVectorValues getFloatVectorValues(String field) throws IOException {
-        final FieldEntry fieldEntry = fieldEntryMap.get(field);
-        return new JVectorFloatVectorValues(fieldEntry.index, fieldEntry.similarityFunction, fieldEntry.graphNodeIdToDocMap);
+  }
+
+  @Override
+  public FloatVectorValues getFloatVectorValues(String field) throws IOException {
+    final FieldEntry fieldEntry = fieldEntryMap.get(field);
+    return new JVectorFloatVectorValues(
+        fieldEntry.index, fieldEntry.similarityFunction, fieldEntry.graphNodeIdToDocMap);
+  }
+
+  @Override
+  public ByteVectorValues getByteVectorValues(String field) throws IOException {
+    /** Byte vector values are not supported in jVector library. Instead use PQ. */
+    return null;
+  }
+
+  public Optional<ProductQuantization> getProductQuantizationForField(String field)
+      throws IOException {
+    final FieldEntry fieldEntry = fieldEntryMap.get(field);
+    if (fieldEntry.pqVectors == null) {
+      return Optional.empty();
     }
 
-    @Override
-    public ByteVectorValues getByteVectorValues(String field) throws IOException {
-        /**
-         * Byte vector values are not supported in jVector library. Instead use PQ.
-         */
-        return null;
+    return Optional.of(fieldEntry.pqVectors.getCompressor());
+  }
+
+  public RandomAccessReader getNeighborsScoreCacheForField(String field) throws IOException {
+    final FieldEntry fieldEntry = fieldEntryMap.get(field);
+    return fieldEntry.neighborsScoreCacheIndexReaderSupplier.get();
+  }
+
+  public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
+    return fieldEntryMap.get(field).index;
+  }
+
+  @Override
+  public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
+      throws IOException {
+    final OnDiskGraphIndex index = fieldEntryMap.get(field).index;
+    final JVectorKnnCollector jvectorKnnCollector;
+    if (knnCollector instanceof JVectorKnnCollector) {
+      jvectorKnnCollector = (JVectorKnnCollector) knnCollector;
+    } else {
+      log.warn(
+          "KnnCollector must be of type JVectorKnnCollector, for now we will re-wrap it but this is not ideal");
+      jvectorKnnCollector =
+          new JVectorKnnCollector(
+              knnCollector,
+              KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
+              KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
+              KNNConstants.DEFAULT_OVER_QUERY_FACTOR,
+              KNNConstants.DEFAULT_QUERY_USE_PRUNING);
     }
 
-    public Optional<ProductQuantization> getProductQuantizationForField(String field) throws IOException {
-        final FieldEntry fieldEntry = fieldEntryMap.get(field);
-        if (fieldEntry.pqVectors == null) {
-            return Optional.empty();
+    // search for a random vector using a GraphSearcher and SearchScoreProvider
+    VectorFloat<?> q = VECTOR_TYPE_SUPPORT.createFloatVector(target);
+    final SearchScoreProvider ssp;
+
+    try (var view = index.getView()) {
+      final long graphSearchStart = System.currentTimeMillis();
+      if (fieldEntryMap.get(field).pqVectors
+          != null) { // Quantized, use the precomputed score function
+        final PQVectors pqVectors = fieldEntryMap.get(field).pqVectors;
+        // SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
+        // then reranks with the exact vectors that are stored on disk in the index
+        ScoreFunction.ApproximateScoreFunction asf =
+            pqVectors.precomputedScoreFunctionFor(q, fieldEntryMap.get(field).similarityFunction);
+        ScoreFunction.ExactScoreFunction reranker =
+            view.rerankerFor(q, fieldEntryMap.get(field).similarityFunction);
+        ssp = new DefaultSearchScoreProvider(asf, reranker);
+      } else { // Not quantized, used typical searcher
+        ssp =
+            DefaultSearchScoreProvider.exact(q, fieldEntryMap.get(field).similarityFunction, view);
+      }
+      final GraphNodeIdToDocMap jvectorLuceneDocMap = fieldEntryMap.get(field).graphNodeIdToDocMap;
+      // Convert the acceptDocs bitmap from Lucene to jVector ordinal bitmap filter
+      // Logic works as follows: if acceptDocs is null, we accept all ordinals. Otherwise, we check
+      // if the jVector ordinal has a
+      // corresponding Lucene doc ID accepted by acceptDocs filter.
+      io.github.jbellis.jvector.util.Bits compatibleBits =
+          ord -> acceptDocs == null || acceptDocs.get(jvectorLuceneDocMap.getLuceneDocId(ord));
+
+      try (var graphSearcher = new GraphSearcher(index)) {
+        final var searchResults =
+            graphSearcher.search(
+                ssp,
+                jvectorKnnCollector.k(),
+                jvectorKnnCollector.k() * jvectorKnnCollector.getOverQueryFactor(),
+                jvectorKnnCollector.getThreshold(),
+                jvectorKnnCollector.getRerankFloor(),
+                compatibleBits);
+        for (SearchResult.NodeScore ns : searchResults.getNodes()) {
+          jvectorKnnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
         }
-
-        return Optional.of(fieldEntry.pqVectors.getCompressor());
+        final long graphSearchEnd = System.currentTimeMillis();
+        final long searchTime = graphSearchEnd - graphSearchStart;
+        log.debug("Search (including acquiring view) took {} ms", searchTime);
+
+        // Collect the below metrics about the search and somehow wire this back to {@link
+        // @KNNStats}
+        final int visitedNodesCount = searchResults.getVisitedCount();
+        final int rerankedCount = searchResults.getRerankedCount();
+
+        final int expandedCount = searchResults.getExpandedCount();
+        final int expandedBaseLayerCount = searchResults.getExpandedCountBaseLayer();
+
+        KNNCounter.KNN_QUERY_VISITED_NODES.add(visitedNodesCount);
+        KNNCounter.KNN_QUERY_RERANKED_COUNT.add(rerankedCount);
+        KNNCounter.KNN_QUERY_EXPANDED_NODES.add(expandedCount);
+        KNNCounter.KNN_QUERY_EXPANDED_BASE_LAYER_NODES.add(expandedBaseLayerCount);
+        KNNCounter.KNN_QUERY_GRAPH_SEARCH_TIME.add(searchTime);
+        log.debug(
+            "rerankedCount: {}, visitedNodesCount: {}, expandedCount: {}, expandedBaseLayerCount: {}",
+            rerankedCount,
+            visitedNodesCount,
+            expandedCount,
+            expandedBaseLayerCount);
+      }
     }
-
-    public RandomAccessReader getNeighborsScoreCacheForField(String field) throws IOException {
-        final FieldEntry fieldEntry = fieldEntryMap.get(field);
-        return fieldEntry.neighborsScoreCacheIndexReaderSupplier.get();
+  }
+
+  @Override
+  public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs)
+      throws IOException {
+    // TODO: implement this
+    throw new UnsupportedOperationException("Byte vector search is not supported yet with jVector");
+  }
+
+  @Override
+  public void close() throws IOException {
+    for (FieldEntry fieldEntry : fieldEntryMap.values()) {
+      IOUtils.close(fieldEntry);
     }
-
-    public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
-        return fieldEntryMap.get(field).index;
+    fieldEntryMap.clear();
+  }
+
+  private void readFields(ChecksumIndexInput meta) throws IOException {
+    for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
+      final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); // read field number
+      JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata =
+          new JVectorWriter.VectorIndexFieldMetadata(meta);
+      assert fieldInfo.number == vectorIndexFieldMetadata.getFieldNumber();
+      fieldEntryMap.put(fieldInfo.name, new FieldEntry(fieldInfo, vectorIndexFieldMetadata));
     }
-
-    @Override
-    public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
-        final OnDiskGraphIndex index = fieldEntryMap.get(field).index;
-        final JVectorKnnCollector jvectorKnnCollector;
-        if (knnCollector instanceof JVectorKnnCollector) {
-            jvectorKnnCollector = (JVectorKnnCollector) knnCollector;
-        } else {
-            log.warn("KnnCollector must be of type JVectorKnnCollector, for now we will re-wrap it but this is not ideal");
-            jvectorKnnCollector = new JVectorKnnCollector(
-                knnCollector,
-                KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
-                KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
-                KNNConstants.DEFAULT_OVER_QUERY_FACTOR,
-                KNNConstants.DEFAULT_QUERY_USE_PRUNING
-            );
-
+  }
+
+  class FieldEntry implements Closeable {
+    private final FieldInfo fieldInfo;
+    private final VectorEncoding vectorEncoding;
+    private final VectorSimilarityFunction similarityFunction;
+    private final int dimension;
+    private final long vectorIndexOffset;
+    private final long vectorIndexLength;
+    private final long pqCodebooksAndVectorsLength;
+    private final long pqCodebooksAndVectorsOffset;
+    private final String vectorIndexFieldDataFileName;
+    private final String neighborsScoreCacheIndexFieldFileName;
+    private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+    private final ReaderSupplier indexReaderSupplier;
+    private final ReaderSupplier pqCodebooksReaderSupplier;
+    private final ReaderSupplier neighborsScoreCacheIndexReaderSupplier;
+    private final OnDiskGraphIndex index;
+    private final PQVectors pqVectors; // The product quantized vectors with their codebooks
+
+    public FieldEntry(
+        FieldInfo fieldInfo, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata)
+        throws IOException {
+      this.fieldInfo = fieldInfo;
+      this.similarityFunction =
+          VectorSimilarityMapper.ordToDistFunc(
+              vectorIndexFieldMetadata.getVectorSimilarityFunction().ordinal());
+      this.vectorEncoding = vectorIndexFieldMetadata.getVectorEncoding();
+      this.vectorIndexOffset = vectorIndexFieldMetadata.getVectorIndexOffset();
+      this.vectorIndexLength = vectorIndexFieldMetadata.getVectorIndexLength();
+      this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.getPqCodebooksAndVectorsLength();
+      this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.getPqCodebooksAndVectorsOffset();
+      this.dimension = vectorIndexFieldMetadata.getVectorDimension();
+      this.graphNodeIdToDocMap = vectorIndexFieldMetadata.getGraphNodeIdToDocMap();
+
+      this.vectorIndexFieldDataFileName =
+          baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
+      this.neighborsScoreCacheIndexFieldFileName =
+          baseDataFileName
+              + "_"
+              + fieldInfo.name
+              + "."
+              + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
+
+      // For the slice we would like to include the Lucene header, unfortunately, we have to do this
+      // because jVector use global
+      // offsets instead of local offsets
+      final long sliceLength =
+          vectorIndexLength
+              + CodecUtil.indexHeaderLength(
+                  JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
+      // Load the graph index
+      this.indexReaderSupplier =
+          new JVectorRandomAccessReader.Supplier(
+              directory.openInput(vectorIndexFieldDataFileName, state.context), 0, sliceLength);
+      this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
+
+      // If quantized load the compressed product quantized vectors with their codebooks
+      if (pqCodebooksAndVectorsLength > 0) {
+        assert pqCodebooksAndVectorsOffset > 0;
+        if (pqCodebooksAndVectorsOffset < vectorIndexOffset) {
+          throw new IllegalArgumentException(
+              "pqCodebooksAndVectorsOffset must be greater than vectorIndexOffset");
         }
-
-        // search for a random vector using a GraphSearcher and SearchScoreProvider
-        VectorFloat<?> q = VECTOR_TYPE_SUPPORT.createFloatVector(target);
-        final SearchScoreProvider ssp;
-
-        try (var view = index.getView()) {
-            final long graphSearchStart = System.currentTimeMillis();
-            if (fieldEntryMap.get(field).pqVectors != null) { // Quantized, use the precomputed score function
-                final PQVectors pqVectors = fieldEntryMap.get(field).pqVectors;
-                // SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
-                // then reranks with the exact vectors that are stored on disk in the index
-                ScoreFunction.ApproximateScoreFunction asf = pqVectors.precomputedScoreFunctionFor(
-                    q,
-                    fieldEntryMap.get(field).similarityFunction
-                );
-                ScoreFunction.ExactScoreFunction reranker = view.rerankerFor(q, fieldEntryMap.get(field).similarityFunction);
-                ssp = new DefaultSearchScoreProvider(asf, reranker);
-            } else { // Not quantized, used typical searcher
-                ssp = DefaultSearchScoreProvider.exact(q, fieldEntryMap.get(field).similarityFunction, view);
-            }
-            final GraphNodeIdToDocMap jvectorLuceneDocMap = fieldEntryMap.get(field).graphNodeIdToDocMap;
-            // Convert the acceptDocs bitmap from Lucene to jVector ordinal bitmap filter
-            // Logic works as follows: if acceptDocs is null, we accept all ordinals. Otherwise, we check if the jVector ordinal has a
-            // corresponding Lucene doc ID accepted by acceptDocs filter.
-            io.github.jbellis.jvector.util.Bits compatibleBits = ord -> acceptDocs == null
-                || acceptDocs.get(jvectorLuceneDocMap.getLuceneDocId(ord));
-
-            try (var graphSearcher = new GraphSearcher(index)) {
-                final var searchResults = graphSearcher.search(
-                    ssp,
-                    jvectorKnnCollector.k(),
-                    jvectorKnnCollector.k() * jvectorKnnCollector.getOverQueryFactor(),
-                    jvectorKnnCollector.getThreshold(),
-                    jvectorKnnCollector.getRerankFloor(),
-                    compatibleBits
-                );
-                for (SearchResult.NodeScore ns : searchResults.getNodes()) {
-                    jvectorKnnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
-                }
-                final long graphSearchEnd = System.currentTimeMillis();
-                final long searchTime = graphSearchEnd - graphSearchStart;
-                log.debug("Search (including acquiring view) took {} ms", searchTime);
-
-                // Collect the below metrics about the search and somehow wire this back to {@link @KNNStats}
-                final int visitedNodesCount = searchResults.getVisitedCount();
-                final int rerankedCount = searchResults.getRerankedCount();
-
-                final int expandedCount = searchResults.getExpandedCount();
-                final int expandedBaseLayerCount = searchResults.getExpandedCountBaseLayer();
-
-                KNNCounter.KNN_QUERY_VISITED_NODES.add(visitedNodesCount);
-                KNNCounter.KNN_QUERY_RERANKED_COUNT.add(rerankedCount);
-                KNNCounter.KNN_QUERY_EXPANDED_NODES.add(expandedCount);
-                KNNCounter.KNN_QUERY_EXPANDED_BASE_LAYER_NODES.add(expandedBaseLayerCount);
-                KNNCounter.KNN_QUERY_GRAPH_SEARCH_TIME.add(searchTime);
-                log.debug(
-                    "rerankedCount: {}, visitedNodesCount: {}, expandedCount: {}, expandedBaseLayerCount: {}",
-                    rerankedCount,
-                    visitedNodesCount,
-                    expandedCount,
-                    expandedBaseLayerCount
-                );
-
-            }
+        this.pqCodebooksReaderSupplier =
+            new JVectorRandomAccessReader.Supplier(
+                directory.openInput(vectorIndexFieldDataFileName, IOContext.READONCE),
+                pqCodebooksAndVectorsOffset,
+                pqCodebooksAndVectorsLength);
+        log.debug(
+            "Loading PQ codebooks and vectors for field {}, with numbers of vectors: {}",
+            fieldInfo.name,
+            state.segmentInfo.maxDoc());
+        try (final var randomAccessReader = pqCodebooksReaderSupplier.get()) {
+          this.pqVectors = PQVectors.load(randomAccessReader);
         }
-    }
+      } else {
+        this.pqCodebooksReaderSupplier = null;
+        this.pqVectors = null;
+      }
 
-    @Override
-    public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs) throws IOException {
-        // TODO: implement this
-        throw new UnsupportedOperationException("Byte vector search is not supported yet with jVector");
+      final IndexInput indexInput =
+          directory.openInput(neighborsScoreCacheIndexFieldFileName, state.context);
+      CodecUtil.readIndexHeader(indexInput);
+
+      this.neighborsScoreCacheIndexReaderSupplier =
+          new JVectorRandomAccessReader.Supplier(indexInput);
     }
 
     @Override
     public void close() throws IOException {
-        for (FieldEntry fieldEntry : fieldEntryMap.values()) {
-            IOUtils.close(fieldEntry);
-        }
-        fieldEntryMap.clear();
-    }
-
-    private void readFields(ChecksumIndexInput meta) throws IOException {
-        for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
-            final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); // read field number
-            JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata = new JVectorWriter.VectorIndexFieldMetadata(meta);
-            assert fieldInfo.number == vectorIndexFieldMetadata.getFieldNumber();
-            fieldEntryMap.put(fieldInfo.name, new FieldEntry(fieldInfo, vectorIndexFieldMetadata));
-        }
-    }
-
-    class FieldEntry implements Closeable {
-        private final FieldInfo fieldInfo;
-        private final VectorEncoding vectorEncoding;
-        private final VectorSimilarityFunction similarityFunction;
-        private final int dimension;
-        private final long vectorIndexOffset;
-        private final long vectorIndexLength;
-        private final long pqCodebooksAndVectorsLength;
-        private final long pqCodebooksAndVectorsOffset;
-        private final String vectorIndexFieldDataFileName;
-        private final String neighborsScoreCacheIndexFieldFileName;
-        private final GraphNodeIdToDocMap graphNodeIdToDocMap;
-        private final ReaderSupplier indexReaderSupplier;
-        private final ReaderSupplier pqCodebooksReaderSupplier;
-        private final ReaderSupplier neighborsScoreCacheIndexReaderSupplier;
-        private final OnDiskGraphIndex index;
-        private final PQVectors pqVectors; // The product quantized vectors with their codebooks
-
-        public FieldEntry(FieldInfo fieldInfo, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata) throws IOException {
-            this.fieldInfo = fieldInfo;
-            this.similarityFunction = VectorSimilarityMapper.ordToDistFunc(
-                vectorIndexFieldMetadata.getVectorSimilarityFunction().ordinal()
-            );
-            this.vectorEncoding = vectorIndexFieldMetadata.getVectorEncoding();
-            this.vectorIndexOffset = vectorIndexFieldMetadata.getVectorIndexOffset();
-            this.vectorIndexLength = vectorIndexFieldMetadata.getVectorIndexLength();
-            this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.getPqCodebooksAndVectorsLength();
-            this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.getPqCodebooksAndVectorsOffset();
-            this.dimension = vectorIndexFieldMetadata.getVectorDimension();
-            this.graphNodeIdToDocMap = vectorIndexFieldMetadata.getGraphNodeIdToDocMap();
-
-            this.vectorIndexFieldDataFileName = baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
-            this.neighborsScoreCacheIndexFieldFileName = baseDataFileName
-                + "_"
-                + fieldInfo.name
-                + "."
-                + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
-
-            // For the slice we would like to include the Lucene header, unfortunately, we have to do this because jVector use global
-            // offsets instead of local offsets
-            final long sliceLength = vectorIndexLength + CodecUtil.indexHeaderLength(
-                JVectorFormat.VECTOR_INDEX_CODEC_NAME,
-                state.segmentSuffix
-            );
-            // Load the graph index
-            this.indexReaderSupplier = new JVectorRandomAccessReader.Supplier(
-                directory.openInput(vectorIndexFieldDataFileName, state.context),
-                0,
-                sliceLength
-            );
-            this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
-
-            // If quantized load the compressed product quantized vectors with their codebooks
-            if (pqCodebooksAndVectorsLength > 0) {
-                assert pqCodebooksAndVectorsOffset > 0;
-                if (pqCodebooksAndVectorsOffset < vectorIndexOffset) {
-                    throw new IllegalArgumentException("pqCodebooksAndVectorsOffset must be greater than vectorIndexOffset");
-                }
-                this.pqCodebooksReaderSupplier = new JVectorRandomAccessReader.Supplier(
-                    directory.openInput(vectorIndexFieldDataFileName, IOContext.READONCE),
-                    pqCodebooksAndVectorsOffset,
-                    pqCodebooksAndVectorsLength
-                );
-                log.debug(
-                    "Loading PQ codebooks and vectors for field {}, with numbers of vectors: {}",
-                    fieldInfo.name,
-                    state.segmentInfo.maxDoc()
-                );
-                try (final var randomAccessReader = pqCodebooksReaderSupplier.get()) {
-                    this.pqVectors = PQVectors.load(randomAccessReader);
-                }
-            } else {
-                this.pqCodebooksReaderSupplier = null;
-                this.pqVectors = null;
-            }
-
-            final IndexInput indexInput = directory.openInput(neighborsScoreCacheIndexFieldFileName, state.context);
-            CodecUtil.readIndexHeader(indexInput);
-
-            this.neighborsScoreCacheIndexReaderSupplier = new JVectorRandomAccessReader.Supplier(indexInput);
-        }
-
-        @Override
-        public void close() throws IOException {
-            if (indexReaderSupplier != null) {
-                IOUtils.close(indexReaderSupplier::close);
-            }
-            if (pqCodebooksReaderSupplier != null) {
-                IOUtils.close(pqCodebooksReaderSupplier::close);
-            }
-            if (neighborsScoreCacheIndexReaderSupplier != null) {
-                IOUtils.close(neighborsScoreCacheIndexReaderSupplier::close);
-            }
-        }
+      if (indexReaderSupplier != null) {
+        IOUtils.close(indexReaderSupplier::close);
+      }
+      if (pqCodebooksReaderSupplier != null) {
+        IOUtils.close(pqCodebooksReaderSupplier::close);
+      }
+      if (neighborsScoreCacheIndexReaderSupplier != null) {
+        IOUtils.close(neighborsScoreCacheIndexReaderSupplier::close);
+      }
     }
+  }
 
+  /** Utility class to map between Lucene and jVector similarity functions and metadata ordinals. */
+  public static class VectorSimilarityMapper {
     /**
-     * Utility class to map between Lucene and jVector similarity functions and metadata ordinals.
+     * List of vector similarity functions supported by <a
+     * href="https://github.com/jbellis/jvector">jVector library</a> The similarity functions orders
+     * matter in this list because it is later used to resolve the similarity function by ordinal.
      */
-    public static class VectorSimilarityMapper {
-        /**
-         List of vector similarity functions supported by <a href="https://github.com/jbellis/jvector">jVector library</a>
-         The similarity functions orders matter in this list because it is later used to resolve the similarity function by ordinal.
-         */
-        public static final List<VectorSimilarityFunction> JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS = List.of(
-            VectorSimilarityFunction.EUCLIDEAN,
-            VectorSimilarityFunction.DOT_PRODUCT,
-            VectorSimilarityFunction.COSINE
-        );
-
-        public static final Map<org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction> LUCENE_TO_JVECTOR_MAP = Map.of(
-            org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN,
+    public static final List<VectorSimilarityFunction> JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS =
+        List.of(
             VectorSimilarityFunction.EUCLIDEAN,
-            org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT,
             VectorSimilarityFunction.DOT_PRODUCT,
-            org.apache.lucene.index.VectorSimilarityFunction.COSINE,
-            VectorSimilarityFunction.COSINE
-        );
-
-        public static int distFuncToOrd(org.apache.lucene.index.VectorSimilarityFunction func) {
-            if (LUCENE_TO_JVECTOR_MAP.containsKey(func)) {
-                return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.indexOf(LUCENE_TO_JVECTOR_MAP.get(func));
-            }
-
-            throw new IllegalArgumentException("invalid distance function: " + func);
-        }
+            VectorSimilarityFunction.COSINE);
+
+    public static final Map<
+            org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction>
+        LUCENE_TO_JVECTOR_MAP =
+            Map.of(
+                org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN,
+                VectorSimilarityFunction.EUCLIDEAN,
+                org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT,
+                VectorSimilarityFunction.DOT_PRODUCT,
+                org.apache.lucene.index.VectorSimilarityFunction.COSINE,
+                VectorSimilarityFunction.COSINE);
+
+    public static int distFuncToOrd(org.apache.lucene.index.VectorSimilarityFunction func) {
+      if (LUCENE_TO_JVECTOR_MAP.containsKey(func)) {
+        return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.indexOf(LUCENE_TO_JVECTOR_MAP.get(func));
+      }
+
+      throw new IllegalArgumentException("invalid distance function: " + func);
+    }
 
-        public static VectorSimilarityFunction ordToDistFunc(int ord) {
-            return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
-        }
+    public static VectorSimilarityFunction ordToDistFunc(int ord) {
+      return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
+    }
 
-        public static org.apache.lucene.index.VectorSimilarityFunction ordToLuceneDistFunc(int ord) {
-            if (ord < 0 || ord >= JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.size()) {
-                throw new IllegalArgumentException("Invalid ordinal: " + ord);
-            }
-            VectorSimilarityFunction jvectorFunc = JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
-            for (Map.Entry<org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction> entry : LUCENE_TO_JVECTOR_MAP
-                .entrySet()) {
-                if (entry.getValue().equals(jvectorFunc)) {
-                    return entry.getKey();
-                }
-            }
-            throw new IllegalStateException("No matching Lucene VectorSimilarityFunction found for ordinal: " + ord);
+    public static org.apache.lucene.index.VectorSimilarityFunction ordToLuceneDistFunc(int ord) {
+      if (ord < 0 || ord >= JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.size()) {
+        throw new IllegalArgumentException("Invalid ordinal: " + ord);
+      }
+      VectorSimilarityFunction jvectorFunc = JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
+      for (Map.Entry<org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction>
+          entry : LUCENE_TO_JVECTOR_MAP.entrySet()) {
+        if (entry.getValue().equals(jvectorFunc)) {
+          return entry.getKey();
         }
+      }
+      throw new IllegalStateException(
+          "No matching Lucene VectorSimilarityFunction found for ordinal: " + ord);
     }
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
index 3e0b042dbe2a..6b7937f51525 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
@@ -19,32 +19,35 @@
 
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
+import java.io.IOException;
 import org.apache.lucene.index.KnnVectorValues;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.VectorScorer;
 
-import java.io.IOException;
-
 public class JVectorVectorScorer implements VectorScorer {
-    private final JVectorFloatVectorValues floatVectorValues;
-    private final KnnVectorValues.DocIndexIterator docIndexIterator;
-    private final VectorFloat<?> target;
-    private final VectorSimilarityFunction similarityFunction;
+  private final JVectorFloatVectorValues floatVectorValues;
+  private final KnnVectorValues.DocIndexIterator docIndexIterator;
+  private final VectorFloat<?> target;
+  private final VectorSimilarityFunction similarityFunction;
 
-    public JVectorVectorScorer(JVectorFloatVectorValues vectorValues, VectorFloat<?> target, VectorSimilarityFunction similarityFunction) {
-        this.floatVectorValues = vectorValues;
-        this.docIndexIterator = floatVectorValues.iterator();
-        this.target = target;
-        this.similarityFunction = similarityFunction;
-    }
+  public JVectorVectorScorer(
+      JVectorFloatVectorValues vectorValues,
+      VectorFloat<?> target,
+      VectorSimilarityFunction similarityFunction) {
+    this.floatVectorValues = vectorValues;
+    this.docIndexIterator = floatVectorValues.iterator();
+    this.target = target;
+    this.similarityFunction = similarityFunction;
+  }
 
-    @Override
-    public float score() throws IOException {
-        return similarityFunction.compare(target, floatVectorValues.vectorFloatValue(docIndexIterator.index()));
-    }
+  @Override
+  public float score() throws IOException {
+    return similarityFunction.compare(
+        target, floatVectorValues.vectorFloatValue(docIndexIterator.index()));
+  }
 
-    @Override
-    public DocIdSetIterator iterator() {
-        return docIndexIterator;
-    }
+  @Override
+  public DocIdSetIterator iterator() {
+    return docIndexIterator;
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 764d4a21a15f..9b17c6165dfd 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -17,6 +17,11 @@
 
 package org.opensearch.knn.index.codec.jvector;
 
+import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED;
+import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
+import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_FLUSH;
+import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_MERGE;
+
 import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.graph.*;
 import io.github.jbellis.jvector.graph.disk.*;
@@ -29,6 +34,13 @@
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
+import java.io.IOException;
+import java.io.UnsupportedEncodingException;
+import java.time.Clock;
+import java.util.*;
+import java.util.concurrent.ForkJoinPool;
+import java.util.function.Function;
+import java.util.stream.IntStream;
 import lombok.AllArgsConstructor;
 import lombok.Builder;
 import lombok.Getter;
@@ -47,1063 +59,1133 @@
 import org.apache.lucene.util.RamUsageEstimator;
 import org.opensearch.knn.plugin.stats.KNNCounter;
 
-import java.io.IOException;
-import java.io.UnsupportedEncodingException;
-import java.time.Clock;
-import java.util.*;
-import java.util.concurrent.ForkJoinPool;
-import java.util.function.Function;
-import java.util.stream.IntStream;
-
-import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED;
-import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
-import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_FLUSH;
-import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_MERGE;
-
 /**
- * JVectorWriter is responsible for writing vector data into index segments using the JVector library.
+ * JVectorWriter is responsible for writing vector data into index segments using the JVector
+ * library.
  *
  * <h2>Persisting the JVector Graph Index</h2>
  *
- * <p>
- * Flushing data into disk segments occurs in two scenarios:
+ * <p>Flushing data into disk segments occurs in two scenarios:
+ *
  * <ol>
- *     <li>When the segment is being flushed to disk (e.g., when a new segment is created) via {@link #flush(int, Sorter.DocMap)}</li>
- *     <li>When the segment is a result of a merge (e.g., when multiple segments are merged into one) via {@link #mergeOneField(FieldInfo, MergeState)}</li>
+ *   <li>When the segment is being flushed to disk (e.g., when a new segment is created) via {@link
+ *       #flush(int, Sorter.DocMap)}
+ *   <li>When the segment is a result of a merge (e.g., when multiple segments are merged into one)
+ *       via {@link #mergeOneField(FieldInfo, MergeState)}
  * </ol>
  *
  * <h2>jVector Graph Ordinal to Lucene Document ID Mapping</h2>
  *
- * <p>
- * JVector keeps its own ordinals to identify its nodes. Those ordinals can be different from the Lucene document IDs.
- * Document IDs in Lucene can change after a merge operation. Therefore, we need to maintain a mapping between
- * JVector ordinals and Lucene document IDs that can hold across merges.
- * <p>
- * Document IDs in Lucene are mapped across merges and sorts using the {@link org.apache.lucene.index.MergeState.DocMap} for merges and {@link org.apache.lucene.index.Sorter.DocMap} for flush/sorts.
- * For jVector however, we don't want to modify the ordinals in the jVector graph, and therefore we need to maintain a mapping between the jVector ordinals and the new Lucene document IDs.
- * This is achieved by keeping checkpoints of the {@link GraphNodeIdToDocMap} class in the index metadata and allowing us to update the mapping as needed across merges by constructing a new mapping from the previous mapping and the {@link MergeState.DocMap} provided in the {@link MergeState}.
- * And across sorts with {@link GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
- * <p>
+ * <p>JVector keeps its own ordinals to identify its nodes. Those ordinals can be different from the
+ * Lucene document IDs. Document IDs in Lucene can change after a merge operation. Therefore, we
+ * need to maintain a mapping between JVector ordinals and Lucene document IDs that can hold across
+ * merges.
  *
+ * <p>Document IDs in Lucene are mapped across merges and sorts using the {@link
+ * org.apache.lucene.index.MergeState.DocMap} for merges and {@link
+ * org.apache.lucene.index.Sorter.DocMap} for flush/sorts. For jVector however, we don't want to
+ * modify the ordinals in the jVector graph, and therefore we need to maintain a mapping between the
+ * jVector ordinals and the new Lucene document IDs. This is achieved by keeping checkpoints of the
+ * {@link GraphNodeIdToDocMap} class in the index metadata and allowing us to update the mapping as
+ * needed across merges by constructing a new mapping from the previous mapping and the {@link
+ * MergeState.DocMap} provided in the {@link MergeState}. And across sorts with {@link
+ * GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
  */
 @Log4j2
 public class JVectorWriter extends KnnVectorsWriter {
-    private static final long SHALLOW_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(JVectorWriter.class);
-
-    private final List<FieldWriter<?>> fields = new ArrayList<>();
-
-    private final IndexOutput meta;
-    private final IndexOutput vectorIndex;
-    private final String indexDataFileName;
-    private final String baseDataFileName;
-    private final SegmentWriteState segmentWriteState;
-    private final int maxConn;
-    private final int beamWidth;
-    private final float degreeOverflow;
-    private final float alpha;
-    private final Function<Integer, Integer> numberOfSubspacesPerVectorSupplier; // Number of subspaces used per vector for PQ quantization
-                                                                                 // as a function of the original dimension
-    private final int minimumBatchSizeForQuantization; // Threshold for the vector count above which we will trigger PQ quantization
-    private final boolean hierarchyEnabled;
-
-    private boolean finished = false;
-
-    public JVectorWriter(
-        SegmentWriteState segmentWriteState,
-        int maxConn,
-        int beamWidth,
-        float degreeOverflow,
-        float alpha,
-        Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
-        int minimumBatchSizeForQuantization,
-        boolean hierarchyEnabled
-    ) throws IOException {
-        this.segmentWriteState = segmentWriteState;
-        this.maxConn = maxConn;
-        this.beamWidth = beamWidth;
-        this.degreeOverflow = degreeOverflow;
-        this.alpha = alpha;
-        this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
-        this.minimumBatchSizeForQuantization = minimumBatchSizeForQuantization;
-        this.hierarchyEnabled = hierarchyEnabled;
-        String metaFileName = IndexFileNames.segmentFileName(
+  private static final long SHALLOW_RAM_BYTES_USED =
+      RamUsageEstimator.shallowSizeOfInstance(JVectorWriter.class);
+
+  private final List<FieldWriter<?>> fields = new ArrayList<>();
+
+  private final IndexOutput meta;
+  private final IndexOutput vectorIndex;
+  private final String indexDataFileName;
+  private final String baseDataFileName;
+  private final SegmentWriteState segmentWriteState;
+  private final int maxConn;
+  private final int beamWidth;
+  private final float degreeOverflow;
+  private final float alpha;
+  private final Function<Integer, Integer>
+      numberOfSubspacesPerVectorSupplier; // Number of subspaces used per vector for PQ quantization
+  // as a function of the original dimension
+  private final int
+      minimumBatchSizeForQuantization; // Threshold for the vector count above which we will trigger
+  // PQ quantization
+  private final boolean hierarchyEnabled;
+
+  private boolean finished = false;
+
+  public JVectorWriter(
+      SegmentWriteState segmentWriteState,
+      int maxConn,
+      int beamWidth,
+      float degreeOverflow,
+      float alpha,
+      Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+      int minimumBatchSizeForQuantization,
+      boolean hierarchyEnabled)
+      throws IOException {
+    this.segmentWriteState = segmentWriteState;
+    this.maxConn = maxConn;
+    this.beamWidth = beamWidth;
+    this.degreeOverflow = degreeOverflow;
+    this.alpha = alpha;
+    this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
+    this.minimumBatchSizeForQuantization = minimumBatchSizeForQuantization;
+    this.hierarchyEnabled = hierarchyEnabled;
+    String metaFileName =
+        IndexFileNames.segmentFileName(
             segmentWriteState.segmentInfo.name,
             segmentWriteState.segmentSuffix,
-            JVectorFormat.META_EXTENSION
-        );
+            JVectorFormat.META_EXTENSION);
 
-        this.indexDataFileName = IndexFileNames.segmentFileName(
+    this.indexDataFileName =
+        IndexFileNames.segmentFileName(
             segmentWriteState.segmentInfo.name,
             segmentWriteState.segmentSuffix,
-            JVectorFormat.VECTOR_INDEX_EXTENSION
-        );
-        this.baseDataFileName = segmentWriteState.segmentInfo.name + "_" + segmentWriteState.segmentSuffix;
-
-        boolean success = false;
-        try {
-            meta = segmentWriteState.directory.createOutput(metaFileName, segmentWriteState.context);
-            vectorIndex = segmentWriteState.directory.createOutput(indexDataFileName, segmentWriteState.context);
-            CodecUtil.writeIndexHeader(
-                meta,
-                JVectorFormat.META_CODEC_NAME,
-                JVectorFormat.VERSION_CURRENT,
-                segmentWriteState.segmentInfo.getId(),
-                segmentWriteState.segmentSuffix
-            );
-
-            CodecUtil.writeIndexHeader(
-                vectorIndex,
-                JVectorFormat.VECTOR_INDEX_CODEC_NAME,
-                JVectorFormat.VERSION_CURRENT,
-                segmentWriteState.segmentInfo.getId(),
-                segmentWriteState.segmentSuffix
-            );
-
-            success = true;
-        } finally {
-            if (!success) {
-                IOUtils.closeWhileHandlingException(this);
-            }
-        }
+            JVectorFormat.VECTOR_INDEX_EXTENSION);
+    this.baseDataFileName =
+        segmentWriteState.segmentInfo.name + "_" + segmentWriteState.segmentSuffix;
+
+    boolean success = false;
+    try {
+      meta = segmentWriteState.directory.createOutput(metaFileName, segmentWriteState.context);
+      vectorIndex =
+          segmentWriteState.directory.createOutput(indexDataFileName, segmentWriteState.context);
+      CodecUtil.writeIndexHeader(
+          meta,
+          JVectorFormat.META_CODEC_NAME,
+          JVectorFormat.VERSION_CURRENT,
+          segmentWriteState.segmentInfo.getId(),
+          segmentWriteState.segmentSuffix);
+
+      CodecUtil.writeIndexHeader(
+          vectorIndex,
+          JVectorFormat.VECTOR_INDEX_CODEC_NAME,
+          JVectorFormat.VERSION_CURRENT,
+          segmentWriteState.segmentInfo.getId(),
+          segmentWriteState.segmentSuffix);
+
+      success = true;
+    } finally {
+      if (!success) {
+        IOUtils.closeWhileHandlingException(this);
+      }
     }
-
-    @Override
-    public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
-        log.info("Adding field {} in segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
-        if (fieldInfo.getVectorEncoding() == VectorEncoding.BYTE) {
-            final String errorMessage = "byte[] vectors are not supported in JVector. "
-                + "Instead you should only use float vectors and leverage product quantization during indexing."
-                + "This can provides much greater savings in storage and memory";
-            log.error(errorMessage);
-            throw new UnsupportedOperationException(errorMessage);
-        }
-        FieldWriter<?> newField = new FieldWriter<>(fieldInfo, segmentWriteState.segmentInfo.name);
-
-        fields.add(newField);
-        return newField;
+  }
+
+  @Override
+  public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
+    log.info("Adding field {} in segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
+    if (fieldInfo.getVectorEncoding() == VectorEncoding.BYTE) {
+      final String errorMessage =
+          "byte[] vectors are not supported in JVector. "
+              + "Instead you should only use float vectors and leverage product quantization during indexing."
+              + "This can provides much greater savings in storage and memory";
+      log.error(errorMessage);
+      throw new UnsupportedOperationException(errorMessage);
     }
-
-    @Override
-    public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
-        log.info("Merging field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
-        try {
-            final long mergeStart = Clock.systemDefaultZone().millis();
-            switch (fieldInfo.getVectorEncoding()) {
-                case BYTE:
-                    throw new UnsupportedEncodingException("Byte vectors are not supported in JVector.");
-                case FLOAT32:
-                    final var mergeRavv = new RandomAccessMergedFloatVectorValues(fieldInfo, mergeState);
-                    mergeRavv.merge();
-                    break;
-            }
-            final long mergeEnd = Clock.systemDefaultZone().millis();
-            final long mergeTime = mergeEnd - mergeStart;
-            KNNCounter.KNN_GRAPH_MERGE_TIME.add(mergeTime);
-            log.info("Completed Merge field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
-        } catch (Exception e) {
-            log.error("Error merging field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name, e);
-            throw e;
-        }
+    FieldWriter<?> newField = new FieldWriter<>(fieldInfo, segmentWriteState.segmentInfo.name);
+
+    fields.add(newField);
+    return newField;
+  }
+
+  @Override
+  public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+    log.info(
+        "Merging field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
+    try {
+      final long mergeStart = Clock.systemDefaultZone().millis();
+      switch (fieldInfo.getVectorEncoding()) {
+        case BYTE:
+          throw new UnsupportedEncodingException("Byte vectors are not supported in JVector.");
+        case FLOAT32:
+          final var mergeRavv = new RandomAccessMergedFloatVectorValues(fieldInfo, mergeState);
+          mergeRavv.merge();
+          break;
+      }
+      final long mergeEnd = Clock.systemDefaultZone().millis();
+      final long mergeTime = mergeEnd - mergeStart;
+      KNNCounter.KNN_GRAPH_MERGE_TIME.add(mergeTime);
+      log.info(
+          "Completed Merge field {} into segment {}",
+          fieldInfo.name,
+          segmentWriteState.segmentInfo.name);
+    } catch (Exception e) {
+      log.error(
+          "Error merging field {} into segment {}",
+          fieldInfo.name,
+          segmentWriteState.segmentInfo.name,
+          e);
+      throw e;
     }
-
-    @Override
-    public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
-        log.info("Flushing {} fields", fields.size());
-
-        log.info("Flushing jVector graph index");
-        for (FieldWriter<?> field : fields) {
-            final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
-            final int[] newToOldOrds = new int[randomAccessVectorValues.size()];
-            for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
-                newToOldOrds[ord] = ord;
-            }
-            final BuildScoreProvider buildScoreProvider;
-            final PQVectors pqVectors;
-            final FieldInfo fieldInfo = field.fieldInfo;
-            if (randomAccessVectorValues.size() >= minimumBatchSizeForQuantization) {
-                log.info("Calculating codebooks and compressed vectors for field {}", fieldInfo.name);
-                pqVectors = getPQVectors(newToOldOrds, randomAccessVectorValues, fieldInfo);
-                buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(getVectorSimilarityFunction(fieldInfo), pqVectors);
-            } else {
-                log.info(
-                    "Vector count: {}, less than limit to trigger PQ quantization: {}, for field {}, will use full precision vectors instead.",
-                    randomAccessVectorValues.size(),
-                    minimumBatchSizeForQuantization,
-                    fieldInfo.name
-                );
-                pqVectors = null;
-                buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(
-                    randomAccessVectorValues,
-                    getVectorSimilarityFunction(fieldInfo)
-                );
-            }
-
-            // Generate the ord to doc mapping
-            final int[] ordinalsToDocIds = new int[randomAccessVectorValues.size()];
-            for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
-                ordinalsToDocIds[ord] = field.docIds.get(ord);
-            }
-            final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(ordinalsToDocIds);
-            if (sortMap != null) {
-                graphNodeIdToDocMap.update(sortMap);
-            }
-
-            OnHeapGraphIndex graph = getGraph(
-                buildScoreProvider,
-                randomAccessVectorValues,
-                newToOldOrds,
-                fieldInfo,
-                segmentWriteState.segmentInfo.name,
-                SIMD_POOL_FLUSH
-            );
-            writeField(field.fieldInfo, field.randomAccessVectorValues, pqVectors, newToOldOrds, graphNodeIdToDocMap, graph);
-
-        }
-    }
-
-    private void writeField(
-        FieldInfo fieldInfo,
-        RandomAccessVectorValues randomAccessVectorValues,
-        PQVectors pqVectors,
-        int[] newToOldOrds,
-        GraphNodeIdToDocMap graphNodeIdToDocMap,
-        OnHeapGraphIndex graph
-    ) throws IOException {
+  }
+
+  @Override
+  public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
+    log.info("Flushing {} fields", fields.size());
+
+    log.info("Flushing jVector graph index");
+    for (FieldWriter<?> field : fields) {
+      final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
+      final int[] newToOldOrds = new int[randomAccessVectorValues.size()];
+      for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
+        newToOldOrds[ord] = ord;
+      }
+      final BuildScoreProvider buildScoreProvider;
+      final PQVectors pqVectors;
+      final FieldInfo fieldInfo = field.fieldInfo;
+      if (randomAccessVectorValues.size() >= minimumBatchSizeForQuantization) {
+        log.info("Calculating codebooks and compressed vectors for field {}", fieldInfo.name);
+        pqVectors = getPQVectors(newToOldOrds, randomAccessVectorValues, fieldInfo);
+        buildScoreProvider =
+            BuildScoreProvider.pqBuildScoreProvider(
+                getVectorSimilarityFunction(fieldInfo), pqVectors);
+      } else {
         log.info(
-            "Writing field {} with vector count: {}, for segment: {}",
-            fieldInfo.name,
+            "Vector count: {}, less than limit to trigger PQ quantization: {}, for field {}, will use full precision vectors instead.",
             randomAccessVectorValues.size(),
-            segmentWriteState.segmentInfo.name
-        );
-        final var vectorIndexFieldMetadata = writeGraph(
+            minimumBatchSizeForQuantization,
+            fieldInfo.name);
+        pqVectors = null;
+        buildScoreProvider =
+            BuildScoreProvider.randomAccessScoreProvider(
+                randomAccessVectorValues, getVectorSimilarityFunction(fieldInfo));
+      }
+
+      // Generate the ord to doc mapping
+      final int[] ordinalsToDocIds = new int[randomAccessVectorValues.size()];
+      for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
+        ordinalsToDocIds[ord] = field.docIds.get(ord);
+      }
+      final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(ordinalsToDocIds);
+      if (sortMap != null) {
+        graphNodeIdToDocMap.update(sortMap);
+      }
+
+      OnHeapGraphIndex graph =
+          getGraph(
+              buildScoreProvider,
+              randomAccessVectorValues,
+              newToOldOrds,
+              fieldInfo,
+              segmentWriteState.segmentInfo.name,
+              SIMD_POOL_FLUSH);
+      writeField(
+          field.fieldInfo,
+          field.randomAccessVectorValues,
+          pqVectors,
+          newToOldOrds,
+          graphNodeIdToDocMap,
+          graph);
+    }
+  }
+
+  private void writeField(
+      FieldInfo fieldInfo,
+      RandomAccessVectorValues randomAccessVectorValues,
+      PQVectors pqVectors,
+      int[] newToOldOrds,
+      GraphNodeIdToDocMap graphNodeIdToDocMap,
+      OnHeapGraphIndex graph)
+      throws IOException {
+    log.info(
+        "Writing field {} with vector count: {}, for segment: {}",
+        fieldInfo.name,
+        randomAccessVectorValues.size(),
+        segmentWriteState.segmentInfo.name);
+    final var vectorIndexFieldMetadata =
+        writeGraph(
             graph,
             randomAccessVectorValues,
             fieldInfo,
             pqVectors,
             newToOldOrds,
-            graphNodeIdToDocMap
-        );
-        meta.writeInt(fieldInfo.number);
-        vectorIndexFieldMetadata.toOutput(meta);
-
-        log.info("Writing neighbors score cache for field {}", fieldInfo.name);
-        // field data file, which contains the graph
-        final String neighborsScoreCacheIndexFieldFileName = baseDataFileName
+            graphNodeIdToDocMap);
+    meta.writeInt(fieldInfo.number);
+    vectorIndexFieldMetadata.toOutput(meta);
+
+    log.info("Writing neighbors score cache for field {}", fieldInfo.name);
+    // field data file, which contains the graph
+    final String neighborsScoreCacheIndexFieldFileName =
+        baseDataFileName
             + "_"
             + fieldInfo.name
             + "."
             + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
-        try (
-            IndexOutput indexOutput = segmentWriteState.directory.createOutput(
-                neighborsScoreCacheIndexFieldFileName,
-                segmentWriteState.context
-            );
-            final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)
-        ) {
-            CodecUtil.writeIndexHeader(
-                indexOutput,
-                JVectorFormat.NEIGHBORS_SCORE_CACHE_CODEC_NAME,
-                JVectorFormat.VERSION_CURRENT,
-                segmentWriteState.segmentInfo.getId(),
-                segmentWriteState.segmentSuffix
-            );
-            graph.save(jVectorIndexWriter);
-            CodecUtil.writeFooter(indexOutput);
-        }
+    try (IndexOutput indexOutput =
+            segmentWriteState.directory.createOutput(
+                neighborsScoreCacheIndexFieldFileName, segmentWriteState.context);
+        final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)) {
+      CodecUtil.writeIndexHeader(
+          indexOutput,
+          JVectorFormat.NEIGHBORS_SCORE_CACHE_CODEC_NAME,
+          JVectorFormat.VERSION_CURRENT,
+          segmentWriteState.segmentInfo.getId(),
+          segmentWriteState.segmentSuffix);
+      graph.save(jVectorIndexWriter);
+      CodecUtil.writeFooter(indexOutput);
     }
-
-    /**
-     * Writes the graph and PQ codebooks and compressed vectors to the vector index file
-     * @param graph graph
-     * @param randomAccessVectorValues random access vector values
-     * @param fieldInfo field info
-     * @return Tuple of start offset and length of the graph
-     * @throws IOException IOException
-     */
-    private VectorIndexFieldMetadata writeGraph(
-        OnHeapGraphIndex graph,
-        RandomAccessVectorValues randomAccessVectorValues,
-        FieldInfo fieldInfo,
-        PQVectors pqVectors,
-        int[] newToOldOrds,
-        GraphNodeIdToDocMap graphNodeIdToDocMap
-    ) throws IOException {
-        // field data file, which contains the graph
-        final String vectorIndexFieldFileName = baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
-
-        try (
-            IndexOutput indexOutput = segmentWriteState.directory.createOutput(vectorIndexFieldFileName, segmentWriteState.context);
-            final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)
-        ) {
-            // Header for the field data file
-            CodecUtil.writeIndexHeader(
-                indexOutput,
-                JVectorFormat.VECTOR_INDEX_CODEC_NAME,
-                JVectorFormat.VERSION_CURRENT,
-                segmentWriteState.segmentInfo.getId(),
-                segmentWriteState.segmentSuffix
-            );
-            final long startOffset = indexOutput.getFilePointer();
-
-            log.info("Writing graph to {}", vectorIndexFieldFileName);
-            var resultBuilder = VectorIndexFieldMetadata.builder()
-                .fieldNumber(fieldInfo.number)
-                .vectorEncoding(fieldInfo.getVectorEncoding())
-                .vectorSimilarityFunction(fieldInfo.getVectorSimilarityFunction())
-                .vectorDimension(randomAccessVectorValues.dimension())
-                .graphNodeIdToDocMap(graphNodeIdToDocMap);
-
-            try (
-                var writer = new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter).with(
-                    new InlineVectors(randomAccessVectorValues.dimension())
-                ).build()
-            ) {
-                var suppliers = Feature.singleStateFactory(
-                    FeatureId.INLINE_VECTORS,
-                    nodeId -> new InlineVectors.State(randomAccessVectorValues.getVector(newToOldOrds[nodeId]))
-                );
-                writer.write(suppliers);
-                long endGraphOffset = jVectorIndexWriter.position();
-                resultBuilder.vectorIndexOffset(startOffset);
-                resultBuilder.vectorIndexLength(endGraphOffset - startOffset);
-
-                // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed vectors
-                if (pqVectors != null) {
-                    log.info(
-                        "Writing PQ codebooks and vectors for field {} since the size is {} >= {}",
-                        fieldInfo.name,
-                        randomAccessVectorValues.size(),
-                        minimumBatchSizeForQuantization
-                    );
-                    resultBuilder.pqCodebooksAndVectorsOffset(endGraphOffset);
-                    // write the compressed vectors and codebooks to disk
-                    pqVectors.write(jVectorIndexWriter);
-                    resultBuilder.pqCodebooksAndVectorsLength(jVectorIndexWriter.position() - endGraphOffset);
-                } else {
-                    resultBuilder.pqCodebooksAndVectorsOffset(0);
-                    resultBuilder.pqCodebooksAndVectorsLength(0);
-                }
-                CodecUtil.writeFooter(indexOutput);
-            }
-
-            return resultBuilder.build();
+  }
+
+  /**
+   * Writes the graph and PQ codebooks and compressed vectors to the vector index file
+   *
+   * @param graph graph
+   * @param randomAccessVectorValues random access vector values
+   * @param fieldInfo field info
+   * @return Tuple of start offset and length of the graph
+   * @throws IOException IOException
+   */
+  private VectorIndexFieldMetadata writeGraph(
+      OnHeapGraphIndex graph,
+      RandomAccessVectorValues randomAccessVectorValues,
+      FieldInfo fieldInfo,
+      PQVectors pqVectors,
+      int[] newToOldOrds,
+      GraphNodeIdToDocMap graphNodeIdToDocMap)
+      throws IOException {
+    // field data file, which contains the graph
+    final String vectorIndexFieldFileName =
+        baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
+
+    try (IndexOutput indexOutput =
+            segmentWriteState.directory.createOutput(
+                vectorIndexFieldFileName, segmentWriteState.context);
+        final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)) {
+      // Header for the field data file
+      CodecUtil.writeIndexHeader(
+          indexOutput,
+          JVectorFormat.VECTOR_INDEX_CODEC_NAME,
+          JVectorFormat.VERSION_CURRENT,
+          segmentWriteState.segmentInfo.getId(),
+          segmentWriteState.segmentSuffix);
+      final long startOffset = indexOutput.getFilePointer();
+
+      log.info("Writing graph to {}", vectorIndexFieldFileName);
+      var resultBuilder =
+          VectorIndexFieldMetadata.builder()
+              .fieldNumber(fieldInfo.number)
+              .vectorEncoding(fieldInfo.getVectorEncoding())
+              .vectorSimilarityFunction(fieldInfo.getVectorSimilarityFunction())
+              .vectorDimension(randomAccessVectorValues.dimension())
+              .graphNodeIdToDocMap(graphNodeIdToDocMap);
+
+      try (var writer =
+          new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
+              .with(new InlineVectors(randomAccessVectorValues.dimension()))
+              .build()) {
+        var suppliers =
+            Feature.singleStateFactory(
+                FeatureId.INLINE_VECTORS,
+                nodeId ->
+                    new InlineVectors.State(
+                        randomAccessVectorValues.getVector(newToOldOrds[nodeId])));
+        writer.write(suppliers);
+        long endGraphOffset = jVectorIndexWriter.position();
+        resultBuilder.vectorIndexOffset(startOffset);
+        resultBuilder.vectorIndexLength(endGraphOffset - startOffset);
+
+        // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed
+        // vectors
+        if (pqVectors != null) {
+          log.info(
+              "Writing PQ codebooks and vectors for field {} since the size is {} >= {}",
+              fieldInfo.name,
+              randomAccessVectorValues.size(),
+              minimumBatchSizeForQuantization);
+          resultBuilder.pqCodebooksAndVectorsOffset(endGraphOffset);
+          // write the compressed vectors and codebooks to disk
+          pqVectors.write(jVectorIndexWriter);
+          resultBuilder.pqCodebooksAndVectorsLength(jVectorIndexWriter.position() - endGraphOffset);
+        } else {
+          resultBuilder.pqCodebooksAndVectorsOffset(0);
+          resultBuilder.pqCodebooksAndVectorsLength(0);
         }
-    }
+        CodecUtil.writeFooter(indexOutput);
+      }
 
-    private PQVectors getPQVectors(int[] newToOldOrds, RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo)
-        throws IOException {
-        final String fieldName = fieldInfo.name;
-        final VectorSimilarityFunction vectorSimilarityFunction = fieldInfo.getVectorSimilarityFunction();
-        log.info("Computing PQ codebooks for field {} for {} vectors", fieldName, randomAccessVectorValues.size());
-        final long start = Clock.systemDefaultZone().millis();
-        final var M = numberOfSubspacesPerVectorSupplier.apply(randomAccessVectorValues.dimension());
-        final var numberOfClustersPerSubspace = Math.min(256, randomAccessVectorValues.size()); // number of centroids per
-        // subspace
-        ProductQuantization pq = ProductQuantization.compute(
+      return resultBuilder.build();
+    }
+  }
+
+  private PQVectors getPQVectors(
+      int[] newToOldOrds, RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo)
+      throws IOException {
+    final String fieldName = fieldInfo.name;
+    final VectorSimilarityFunction vectorSimilarityFunction =
+        fieldInfo.getVectorSimilarityFunction();
+    log.info(
+        "Computing PQ codebooks for field {} for {} vectors",
+        fieldName,
+        randomAccessVectorValues.size());
+    final long start = Clock.systemDefaultZone().millis();
+    final var M = numberOfSubspacesPerVectorSupplier.apply(randomAccessVectorValues.dimension());
+    final var numberOfClustersPerSubspace =
+        Math.min(256, randomAccessVectorValues.size()); // number of centroids per
+    // subspace
+    ProductQuantization pq =
+        ProductQuantization.compute(
             randomAccessVectorValues,
             M, // number of subspaces
             numberOfClustersPerSubspace, // number of centroids per subspace
             vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN, // center the dataset
             UNWEIGHTED,
             SIMD_POOL_MERGE,
-            ForkJoinPool.commonPool()
-        );
+            ForkJoinPool.commonPool());
+
+    final long end = Clock.systemDefaultZone().millis();
+    final long trainingTime = end - start;
+    log.info("Computed PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
+    KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
+    log.info(
+        "Encoding and building PQ vectors for field {} for {} vectors",
+        fieldName,
+        randomAccessVectorValues.size());
+    // PQVectors pqVectors = pq.encodeAll(randomAccessVectorValues, SIMD_POOL);
+    PQVectors pqVectors =
+        PQVectors.encodeAndBuild(
+            pq, newToOldOrds.length, newToOldOrds, randomAccessVectorValues, SIMD_POOL_MERGE);
+    log.info(
+        "Encoded and built PQ vectors for field {}, original size: {} bytes, compressed size: {} bytes",
+        fieldName,
+        pqVectors.getOriginalSize(),
+        pqVectors.getCompressedSize());
+    return pqVectors;
+  }
+
+  @Value
+  @Builder(toBuilder = true)
+  @AllArgsConstructor
+  public static class VectorIndexFieldMetadata {
+    int fieldNumber;
+    VectorEncoding vectorEncoding;
+    VectorSimilarityFunction vectorSimilarityFunction;
+    int vectorDimension;
+    long vectorIndexOffset;
+    long vectorIndexLength;
+    long pqCodebooksAndVectorsOffset;
+    long pqCodebooksAndVectorsLength;
+    float degreeOverflow; // important when leveraging cache
+    GraphNodeIdToDocMap graphNodeIdToDocMap;
+
+    public void toOutput(IndexOutput out) throws IOException {
+      out.writeInt(fieldNumber);
+      out.writeInt(vectorEncoding.ordinal());
+      out.writeInt(JVectorReader.VectorSimilarityMapper.distFuncToOrd(vectorSimilarityFunction));
+      out.writeVInt(vectorDimension);
+      out.writeVLong(vectorIndexOffset);
+      out.writeVLong(vectorIndexLength);
+      out.writeVLong(pqCodebooksAndVectorsOffset);
+      out.writeVLong(pqCodebooksAndVectorsLength);
+      out.writeInt(Float.floatToIntBits(degreeOverflow));
+      graphNodeIdToDocMap.toOutput(out);
+    }
 
-        final long end = Clock.systemDefaultZone().millis();
-        final long trainingTime = end - start;
-        log.info("Computed PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
-        KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
-        log.info("Encoding and building PQ vectors for field {} for {} vectors", fieldName, randomAccessVectorValues.size());
-        // PQVectors pqVectors = pq.encodeAll(randomAccessVectorValues, SIMD_POOL);
-        PQVectors pqVectors = PQVectors.encodeAndBuild(pq, newToOldOrds.length, newToOldOrds, randomAccessVectorValues, SIMD_POOL_MERGE);
-        log.info(
-            "Encoded and built PQ vectors for field {}, original size: {} bytes, compressed size: {} bytes",
-            fieldName,
-            pqVectors.getOriginalSize(),
-            pqVectors.getCompressedSize()
-        );
-        return pqVectors;
+    public VectorIndexFieldMetadata(IndexInput in) throws IOException {
+      this.fieldNumber = in.readInt();
+      this.vectorEncoding = readVectorEncoding(in);
+      this.vectorSimilarityFunction =
+          JVectorReader.VectorSimilarityMapper.ordToLuceneDistFunc(in.readInt());
+      this.vectorDimension = in.readVInt();
+      this.vectorIndexOffset = in.readVLong();
+      this.vectorIndexLength = in.readVLong();
+      this.pqCodebooksAndVectorsOffset = in.readVLong();
+      this.pqCodebooksAndVectorsLength = in.readVLong();
+      this.degreeOverflow = Float.intBitsToFloat(in.readInt());
+      this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(in);
     }
+  }
 
-    @Value
-    @Builder(toBuilder = true)
-    @AllArgsConstructor
-    public static class VectorIndexFieldMetadata {
-        int fieldNumber;
-        VectorEncoding vectorEncoding;
-        VectorSimilarityFunction vectorSimilarityFunction;
-        int vectorDimension;
-        long vectorIndexOffset;
-        long vectorIndexLength;
-        long pqCodebooksAndVectorsOffset;
-        long pqCodebooksAndVectorsLength;
-        float degreeOverflow; // important when leveraging cache
-        GraphNodeIdToDocMap graphNodeIdToDocMap;
-
-        public void toOutput(IndexOutput out) throws IOException {
-            out.writeInt(fieldNumber);
-            out.writeInt(vectorEncoding.ordinal());
-            out.writeInt(JVectorReader.VectorSimilarityMapper.distFuncToOrd(vectorSimilarityFunction));
-            out.writeVInt(vectorDimension);
-            out.writeVLong(vectorIndexOffset);
-            out.writeVLong(vectorIndexLength);
-            out.writeVLong(pqCodebooksAndVectorsOffset);
-            out.writeVLong(pqCodebooksAndVectorsLength);
-            out.writeInt(Float.floatToIntBits(degreeOverflow));
-            graphNodeIdToDocMap.toOutput(out);
-        }
+  @Override
+  public void finish() throws IOException {
+    log.info("Finishing segment {}", segmentWriteState.segmentInfo.name);
+    if (finished) {
+      throw new IllegalStateException("already finished");
+    }
+    finished = true;
 
-        public VectorIndexFieldMetadata(IndexInput in) throws IOException {
-            this.fieldNumber = in.readInt();
-            this.vectorEncoding = readVectorEncoding(in);
-            this.vectorSimilarityFunction = JVectorReader.VectorSimilarityMapper.ordToLuceneDistFunc(in.readInt());
-            this.vectorDimension = in.readVInt();
-            this.vectorIndexOffset = in.readVLong();
-            this.vectorIndexLength = in.readVLong();
-            this.pqCodebooksAndVectorsOffset = in.readVLong();
-            this.pqCodebooksAndVectorsLength = in.readVLong();
-            this.degreeOverflow = Float.intBitsToFloat(in.readInt());
-            this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(in);
-        }
+    if (meta != null) {
+      // write end of fields marker
+      meta.writeInt(-1);
+      CodecUtil.writeFooter(meta);
+    }
 
+    if (vectorIndex != null) {
+      CodecUtil.writeFooter(vectorIndex);
+    }
+  }
+
+  @Override
+  public void close() throws IOException {
+    IOUtils.close(meta, vectorIndex);
+  }
+
+  @Override
+  public long ramBytesUsed() {
+    long total = SHALLOW_RAM_BYTES_USED;
+    for (FieldWriter<?> field : fields) {
+      // the field tracks the delegate field usage
+      total += field.ramBytesUsed();
+    }
+    return total;
+  }
+
+  /**
+   * The FieldWriter class is responsible for writing vector field data into index segments. It
+   * provides functionality to process vector values as those being added, manage memory usage, and
+   * build HNSW graph indexing structures for efficient retrieval during search queries.
+   *
+   * @param <T> The type of vector value to be handled by the writer. This is often specialized to
+   *     support specific implementations, such as float[] or byte[] vectors.
+   */
+  static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
+    private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
+        VectorizationProvider.getInstance().getVectorTypeSupport();
+    private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
+    @Getter private final FieldInfo fieldInfo;
+    private int lastDocID = -1;
+    private final String segmentName;
+    private final RandomAccessVectorValues randomAccessVectorValues;
+    // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to
+    // the jVector ordinal
+    private final List<VectorFloat<?>> vectors = new ArrayList<>();
+    private final List<Integer> docIds = new ArrayList<>();
+
+    FieldWriter(FieldInfo fieldInfo, String segmentName) {
+      /** For creating a new field from a flat field vectors writer. */
+      this.randomAccessVectorValues =
+          new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
+      this.fieldInfo = fieldInfo;
+      this.segmentName = segmentName;
     }
 
     @Override
-    public void finish() throws IOException {
-        log.info("Finishing segment {}", segmentWriteState.segmentInfo.name);
-        if (finished) {
-            throw new IllegalStateException("already finished");
-        }
-        finished = true;
-
-        if (meta != null) {
-            // write end of fields marker
-            meta.writeInt(-1);
-            CodecUtil.writeFooter(meta);
-        }
-
-        if (vectorIndex != null) {
-            CodecUtil.writeFooter(vectorIndex);
-        }
+    public void addValue(int docID, T vectorValue) throws IOException {
+      log.trace(
+          "Adding value {} to field {} in segment {}", vectorValue, fieldInfo.name, segmentName);
+      if (docID == lastDocID) {
+        throw new IllegalArgumentException(
+            "VectorValuesField \""
+                + fieldInfo.name
+                + "\" appears more than once in this document (only one value is allowed per field)");
+      }
+      docIds.add(docID);
+      if (vectorValue instanceof float[]) {
+        vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(vectorValue));
+      } else if (vectorValue instanceof byte[]) {
+        final String errorMessage =
+            "byte[] vectors are not supported in JVector. "
+                + "Instead you should only use float vectors and leverage product quantization during indexing."
+                + "This can provides much greater savings in storage and memory";
+        log.error("{}", errorMessage);
+        throw new UnsupportedOperationException(errorMessage);
+      } else {
+        throw new IllegalArgumentException("Unsupported vector type: " + vectorValue.getClass());
+      }
 
+      lastDocID = docID;
     }
 
     @Override
-    public void close() throws IOException {
-        IOUtils.close(meta, vectorIndex);
+    public T copyValue(T vectorValue) {
+      throw new UnsupportedOperationException("copyValue not supported");
     }
 
     @Override
     public long ramBytesUsed() {
-        long total = SHALLOW_RAM_BYTES_USED;
-        for (FieldWriter<?> field : fields) {
-            // the field tracks the delegate field usage
-            total += field.ramBytesUsed();
-        }
-        return total;
+      return SHALLOW_SIZE + (long) vectors.size() * fieldInfo.getVectorDimension() * Float.BYTES;
     }
+  }
+
+  static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimilarityFunction(
+      FieldInfo fieldInfo) {
+    log.info(
+        "Matching vector similarity function {} for field {}",
+        fieldInfo.getVectorSimilarityFunction(),
+        fieldInfo.name);
+    return switch (fieldInfo.getVectorSimilarityFunction()) {
+      case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
+      case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
+      case DOT_PRODUCT -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.DOT_PRODUCT;
+      default ->
+          throw new IllegalArgumentException(
+              "Unsupported similarity function: " + fieldInfo.getVectorSimilarityFunction());
+    };
+  }
+
+  /**
+   * Implementation of RandomAccessVectorValues that directly uses the source FloatVectorValues from
+   * multiple segments without copying the vectors.
+   *
+   * <p>Some details about the implementation logic:
+   *
+   * <p>First, we identify the leading reader, which is the one with the most live vectors. Second,
+   * we build a mapping between the ravv ordinals and the reader index and the ordinal in that
+   * reader. Third, we build a mapping between the ravv ordinals and the global doc ids.
+   *
+   * <p>Very important to note that for the leading graph the node Ids need to correspond to their
+   * original ravv ordinals in the reader. This is because we are later going to expand that graph
+   * with new vectors from the other readers. While the new vectors can be assigned arbitrary node
+   * Ids, the leading graph needs to preserve its original node Ids and map them to the original
+   * ravv vector ordinals.
+   */
+  class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
+    private static final int READER_ID = 0;
+    private static final int READER_ORD = 1;
+    private static final int LEADING_READER_IDX = 0;
+
+    private final VectorTypeSupport VECTOR_TYPE_SUPPORT =
+        VectorizationProvider.getInstance().getVectorTypeSupport();
+
+    // Array of sub-readers
+    private final KnnVectorsReader[] readers;
+    private final JVectorFloatVectorValues[] perReaderFloatVectorValues;
+
+    // Maps the ravv ordinals to the reader index and the ordinal in that reader. This is allowing
+    // us to get a unified view of all the
+    // vectors in all the readers with a single unified ordinal space.
+    private final int[][] ravvOrdToReaderMapping;
+
+    // Total number of vectors
+    private final int size;
+    // Total number of documents including those without values
+    private final int totalDocsCount;
+
+    // Vector dimension
+    private final int dimension;
+    private final FieldInfo fieldInfo;
+    private final MergeState mergeState;
+    private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+    private final int[] graphNodeIdsToRavvOrds;
+    private boolean deletesFound = false;
 
     /**
-     * The FieldWriter class is responsible for writing vector field data into index segments.
-     * It provides functionality to process vector values as those being added, manage memory usage, and build HNSW graph
-     * indexing structures for efficient retrieval during search queries.
+     * Creates a random access view over merged float vector values.
      *
-     * @param <T> The type of vector value to be handled by the writer.
-     * This is often specialized to support specific implementations, such as float[] or byte[] vectors.
+     * @param fieldInfo Field info for the vector field
+     * @param mergeState Merge state containing readers and doc maps
      */
-    static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
-        private static final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
-        private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
-        @Getter
-        private final FieldInfo fieldInfo;
-        private int lastDocID = -1;
-        private final String segmentName;
-        private final RandomAccessVectorValues randomAccessVectorValues;
-        // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to the jVector ordinal
-        private final List<VectorFloat<?>> vectors = new ArrayList<>();
-        private final List<Integer> docIds = new ArrayList<>();
-
-        FieldWriter(FieldInfo fieldInfo, String segmentName) {
-            /**
-             * For creating a new field from a flat field vectors writer.
-             */
-            this.randomAccessVectorValues = new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
-            this.fieldInfo = fieldInfo;
-            this.segmentName = segmentName;
-        }
-
-        @Override
-        public void addValue(int docID, T vectorValue) throws IOException {
-            log.trace("Adding value {} to field {} in segment {}", vectorValue, fieldInfo.name, segmentName);
-            if (docID == lastDocID) {
-                throw new IllegalArgumentException(
-                    "VectorValuesField \""
-                        + fieldInfo.name
-                        + "\" appears more than once in this document (only one value is allowed per field)"
-                );
+    public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState mergeState)
+        throws IOException {
+      this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
+      this.fieldInfo = fieldInfo;
+      this.mergeState = mergeState;
+
+      final String fieldName = fieldInfo.name;
+
+      // Count total vectors, collect readers and identify leading reader, collect base ordinals to
+      // later be used to build the mapping
+      // between global ordinals and global lucene doc ids
+      int totalVectorsCount = 0;
+      int totalLiveVectorsCount = 0;
+      int dimension = 0;
+      int tempLeadingReaderIdx = -1;
+      int vectorsCountInLeadingReader = -1;
+      List<KnnVectorsReader> allReaders = new ArrayList<>();
+      final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
+      final Bits[] liveDocs = mergeState.liveDocs.clone();
+      final int[] baseOrds = new int[mergeState.knnVectorsReaders.length];
+      final int[] deletedOrds =
+          new int
+              [mergeState
+                  .knnVectorsReaders
+                  .length]; // counts the number of deleted documents in each reader
+      // that previously had a vector
+
+      // Find the leading reader, count the total number of live vectors, and the base ordinals for
+      // each reader
+      for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
+        FieldInfos fieldInfos = mergeState.fieldInfos[i];
+        baseOrds[i] = totalVectorsCount;
+        if (MergedVectorValues.hasVectorValues(fieldInfos, fieldName)) {
+          KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
+          if (reader != null) {
+            FloatVectorValues values = reader.getFloatVectorValues(fieldName);
+            if (values != null) {
+              allReaders.add(reader);
+              int vectorCountInReader = values.size();
+              int liveVectorCountInReader = 0;
+              KnnVectorValues.DocIndexIterator it = values.iterator();
+              while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+                if (liveDocs[i] == null || liveDocs[i].get(it.docID())) {
+                  liveVectorCountInReader++;
+                } else {
+                  deletedOrds[i]++;
+                  deletesFound = true;
+                }
+              }
+              if (liveVectorCountInReader >= vectorsCountInLeadingReader) {
+                vectorsCountInLeadingReader = liveVectorCountInReader;
+                tempLeadingReaderIdx = i;
+              }
+              totalVectorsCount += vectorCountInReader;
+              totalLiveVectorsCount += liveVectorCountInReader;
+              dimension = Math.max(dimension, values.dimension());
             }
-            docIds.add(docID);
-            if (vectorValue instanceof float[]) {
-                vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(vectorValue));
-            } else if (vectorValue instanceof byte[]) {
-                final String errorMessage = "byte[] vectors are not supported in JVector. "
-                    + "Instead you should only use float vectors and leverage product quantization during indexing."
-                    + "This can provides much greater savings in storage and memory";
-                log.error("{}", errorMessage);
-                throw new UnsupportedOperationException(errorMessage);
+          }
+        }
+      }
+
+      assert (totalVectorsCount <= totalDocsCount)
+          : "Total number of vectors exceeds the total number of documents";
+      assert (totalLiveVectorsCount <= totalVectorsCount)
+          : "Total number of live vectors exceeds the total number of vectors";
+      assert (dimension > 0) : "No vectors found for field " + fieldName;
+
+      this.size = totalVectorsCount;
+      this.readers = new KnnVectorsReader[allReaders.size()];
+      for (int i = 0; i < readers.length; i++) {
+        readers[i] = allReaders.get(i);
+      }
+
+      // always swap the leading reader to the first position
+      // For this part we need to make sure we also swap all the other metadata arrays that are
+      // indexed by reader index
+      // Such as readers, docMaps, liveDocs, baseOrds, deletedOrds
+      if (tempLeadingReaderIdx != 0) {
+        final KnnVectorsReader temp = readers[LEADING_READER_IDX];
+        readers[LEADING_READER_IDX] = readers[tempLeadingReaderIdx];
+        readers[tempLeadingReaderIdx] = temp;
+        // also swap the leading doc map to the first position to match the readers
+        final MergeState.DocMap tempDocMap = docMaps[LEADING_READER_IDX];
+        docMaps[LEADING_READER_IDX] = docMaps[tempLeadingReaderIdx];
+        docMaps[tempLeadingReaderIdx] = tempDocMap;
+        // swap base ords
+        final int tempBaseOrd = baseOrds[LEADING_READER_IDX];
+        baseOrds[LEADING_READER_IDX] = baseOrds[tempLeadingReaderIdx];
+        baseOrds[tempLeadingReaderIdx] = tempBaseOrd;
+      }
+
+      this.perReaderFloatVectorValues = new JVectorFloatVectorValues[readers.length];
+      this.dimension = dimension;
+
+      // Build mapping from global ordinal to [readerIndex, readerOrd]
+      this.ravvOrdToReaderMapping = new int[totalDocsCount][2];
+
+      int documentsIterated = 0;
+
+      // Will be used to build the new graphNodeIdToDocMap with the new graph node id to docId
+      // mapping.
+      // This mapping should not be used to access the vectors at any time during construction, but
+      // only after the merge is complete
+      // and the new segment is created and used by searchers.
+      final int[] graphNodeIdToDocIds = new int[totalLiveVectorsCount];
+      this.graphNodeIdsToRavvOrds = new int[totalLiveVectorsCount];
+
+      int graphNodeId = 0;
+      if (deletesFound) {
+        // If there are deletes, we need to build a new graph from scratch and compact the graph
+        // node ids
+        // TODO: remove this logic once we support incremental graph building with deletes see
+        // https://github.com/opensearch-project/opensearch-jvector/issues/171
+        for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
+          final JVectorFloatVectorValues values =
+              (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
+          perReaderFloatVectorValues[readerIdx] = values;
+          // For each vector in this reader
+          KnnVectorValues.DocIndexIterator it = values.iterator();
+
+          for (int docId = it.nextDoc();
+              docId != DocIdSetIterator.NO_MORE_DOCS;
+              docId = it.nextDoc()) {
+            if (docMaps[readerIdx].get(docId) == -1) {
+              log.warn(
+                  "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
+                  docId,
+                  readerIdx);
             } else {
-                throw new IllegalArgumentException("Unsupported vector type: " + vectorValue.getClass());
+              // Mapping from ravv ordinals to [readerIndex, readerOrd]
+              // Map graph node id to ravv ordinal
+              // Map graph node id to doc id
+              final int newGlobalDocId = docMaps[readerIdx].get(docId);
+              final int ravvLocalOrd = it.index();
+              final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
+              graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
+              graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
+              graphNodeId++;
+              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
+              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
             }
 
-            lastDocID = docID;
+            documentsIterated++;
+          }
         }
-
-        @Override
-        public T copyValue(T vectorValue) {
-            throw new UnsupportedOperationException("copyValue not supported");
-        }
-
-        @Override
-        public long ramBytesUsed() {
-            return SHALLOW_SIZE + (long) vectors.size() * fieldInfo.getVectorDimension() * Float.BYTES;
+      } else {
+        // If there are no deletes, we can reuse the existing graph and simply remap the ravv
+        // ordinals to the new global doc ids
+        // for the leading reader we must preserve the original node Ids and map them to the
+        // corresponding ravv vectors originally
+        // used to build the graph
+        // This is necessary because we are later going to expand that graph with new vectors from
+        // the other readers.
+        // The leading reader is ALWAYS the first one in the readers array
+        final JVectorFloatVectorValues leadingReaderValues =
+            (JVectorFloatVectorValues) readers[LEADING_READER_IDX].getFloatVectorValues(fieldName);
+        perReaderFloatVectorValues[LEADING_READER_IDX] = leadingReaderValues;
+        var leadingReaderIt = leadingReaderValues.iterator();
+        for (int docId = leadingReaderIt.nextDoc();
+            docId != DocIdSetIterator.NO_MORE_DOCS;
+            docId = leadingReaderIt.nextDoc()) {
+          final int newGlobalDocId = docMaps[LEADING_READER_IDX].get(docId);
+          if (newGlobalDocId == -1) {
+            log.warn(
+                "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
+                docId,
+                LEADING_READER_IDX);
+          } else {
+            final int ravvLocalOrd = leadingReaderIt.index();
+            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[LEADING_READER_IDX];
+            graphNodeIdToDocIds[ravvLocalOrd] = newGlobalDocId;
+            graphNodeIdsToRavvOrds[ravvLocalOrd] = ravvGlobalOrd;
+            graphNodeId++;
+            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = LEADING_READER_IDX; // Reader index
+            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
+          }
+
+          documentsIterated++;
         }
 
-    }
+        // For the remaining readers we map the graph node id to the ravv ordinal in the order they
+        // appear
+        for (int readerIdx = 1; readerIdx < readers.length; readerIdx++) {
+          final JVectorFloatVectorValues values =
+              (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
+          perReaderFloatVectorValues[readerIdx] = values;
+          // For each vector in this reader
+          KnnVectorValues.DocIndexIterator it = values.iterator();
+
+          for (int docId = it.nextDoc();
+              docId != DocIdSetIterator.NO_MORE_DOCS;
+              docId = it.nextDoc()) {
+            if (docMaps[readerIdx].get(docId) == -1) {
+              log.warn(
+                  "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
+                  docId,
+                  readerIdx);
+            } else {
+              // Mapping from ravv ordinals to [readerIndex, readerOrd]
+              // Map graph node id to ravv ordinal
+              // Map graph node id to doc id
+              final int newGlobalDocId = docMaps[readerIdx].get(docId);
+              final int ravvLocalOrd = it.index();
+              final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
+              graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
+              graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
+              graphNodeId++;
+              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
+              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
+            }
 
-    static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimilarityFunction(FieldInfo fieldInfo) {
-        log.info("Matching vector similarity function {} for field {}", fieldInfo.getVectorSimilarityFunction(), fieldInfo.name);
-        return switch (fieldInfo.getVectorSimilarityFunction()) {
-            case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
-            case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
-            case DOT_PRODUCT -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.DOT_PRODUCT;
-            default -> throw new IllegalArgumentException("Unsupported similarity function: " + fieldInfo.getVectorSimilarityFunction());
-        };
+            documentsIterated++;
+          }
+        }
+      }
+
+      if (documentsIterated < totalVectorsCount) {
+        throw new IllegalStateException(
+            "More documents were expected than what was found in the readers."
+                + "Expected at least number of total vectors: "
+                + totalVectorsCount
+                + " but found only: "
+                + documentsIterated
+                + " documents.");
+      }
+
+      this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(graphNodeIdToDocIds);
+      log.debug(
+          "Created RandomAccessMergedFloatVectorValues with {} total vectors from {} readers",
+          size,
+          readers.length);
     }
 
     /**
-     * Implementation of RandomAccessVectorValues that directly uses the source
-     * FloatVectorValues from multiple segments without copying the vectors.
+     * Merges the float vector values from multiple readers into a unified structure. This process
+     * includes handling product quantization (PQ) for vector compression, generating ord-to-doc
+     * mappings, and writing the merged index into a new segment file.
+     *
+     * <p>The method determines if pre-existing product quantization codebooks are available from
+     * the leading reader. If available, it refines them using remaining vectors from other readers
+     * in the merge. If no pre-existing codebooks are found and the total vector count meets the
+     * required minimum threshold, new codebooks and compressed vectors are computed. Otherwise, no
+     * PQ compression is applied.
+     *
+     * <p>Also, it generates a mapping of ordinals to document IDs by iterating through the provided
+     * vector data, which is further used to write the field data.
      *
-     * Some details about the implementation logic:
+     * <p>In the event of no deletes or quantization, the graph construction is done by
+     * incrementally adding vectors from smaller segments into the largest segment. For all other
+     * cases, we build a new graph from scratch from all the vectors.
      *
-     * First, we identify the leading reader, which is the one with the most live vectors.
-     * Second, we build a mapping between the ravv ordinals and the reader index and the ordinal in that reader.
-     * Third, we build a mapping between the ravv ordinals and the global doc ids.
+     * <p>TODO: Add support for incremental graph building with quantization see <a
+     * href="https://github.com/opensearch-project/opensearch-jvector/issues/166">issue</a>
      *
-     * Very important to note that for the leading graph the node Ids need to correspond to their original ravv ordinals in the reader.
-     * This is because we are later going to expand that graph with new vectors from the other readers.
-     * While the new vectors can be assigned arbitrary node Ids, the leading graph needs to preserve its original node Ids and map them to the original ravv vector ordinals.
+     * @throws IOException if there is an issue during reading or writing vector data.
      */
-    class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
-        private static final int READER_ID = 0;
-        private static final int READER_ORD = 1;
-        private static final int LEADING_READER_IDX = 0;
-
-        private final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
-
-        // Array of sub-readers
-        private final KnnVectorsReader[] readers;
-        private final JVectorFloatVectorValues[] perReaderFloatVectorValues;
-
-        // Maps the ravv ordinals to the reader index and the ordinal in that reader. This is allowing us to get a unified view of all the
-        // vectors in all the readers with a single unified ordinal space.
-        private final int[][] ravvOrdToReaderMapping;
-
-        // Total number of vectors
-        private final int size;
-        // Total number of documents including those without values
-        private final int totalDocsCount;
-
-        // Vector dimension
-        private final int dimension;
-        private final FieldInfo fieldInfo;
-        private final MergeState mergeState;
-        private final GraphNodeIdToDocMap graphNodeIdToDocMap;
-        private final int[] graphNodeIdsToRavvOrds;
-        private boolean deletesFound = false;
-
-        /**
-         * Creates a random access view over merged float vector values.
-         *
-         * @param fieldInfo Field info for the vector field
-         * @param mergeState Merge state containing readers and doc maps
-         */
-        public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
-            this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
-            this.fieldInfo = fieldInfo;
-            this.mergeState = mergeState;
-
-            final String fieldName = fieldInfo.name;
-
-            // Count total vectors, collect readers and identify leading reader, collect base ordinals to later be used to build the mapping
-            // between global ordinals and global lucene doc ids
-            int totalVectorsCount = 0;
-            int totalLiveVectorsCount = 0;
-            int dimension = 0;
-            int tempLeadingReaderIdx = -1;
-            int vectorsCountInLeadingReader = -1;
-            List<KnnVectorsReader> allReaders = new ArrayList<>();
-            final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
-            final Bits[] liveDocs = mergeState.liveDocs.clone();
-            final int[] baseOrds = new int[mergeState.knnVectorsReaders.length];
-            final int[] deletedOrds = new int[mergeState.knnVectorsReaders.length]; // counts the number of deleted documents in each reader
-                                                                                    // that previously had a vector
-
-            // Find the leading reader, count the total number of live vectors, and the base ordinals for each reader
-            for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
-                FieldInfos fieldInfos = mergeState.fieldInfos[i];
-                baseOrds[i] = totalVectorsCount;
-                if (MergedVectorValues.hasVectorValues(fieldInfos, fieldName)) {
-                    KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
-                    if (reader != null) {
-                        FloatVectorValues values = reader.getFloatVectorValues(fieldName);
-                        if (values != null) {
-                            allReaders.add(reader);
-                            int vectorCountInReader = values.size();
-                            int liveVectorCountInReader = 0;
-                            KnnVectorValues.DocIndexIterator it = values.iterator();
-                            while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-                                if (liveDocs[i] == null || liveDocs[i].get(it.docID())) {
-                                    liveVectorCountInReader++;
-                                } else {
-                                    deletedOrds[i]++;
-                                    deletesFound = true;
-                                }
-                            }
-                            if (liveVectorCountInReader >= vectorsCountInLeadingReader) {
-                                vectorsCountInLeadingReader = liveVectorCountInReader;
-                                tempLeadingReaderIdx = i;
-                            }
-                            totalVectorsCount += vectorCountInReader;
-                            totalLiveVectorsCount += liveVectorCountInReader;
-                            dimension = Math.max(dimension, values.dimension());
-                        }
-                    }
-                }
-            }
-
-            assert (totalVectorsCount <= totalDocsCount) : "Total number of vectors exceeds the total number of documents";
-            assert (totalLiveVectorsCount <= totalVectorsCount) : "Total number of live vectors exceeds the total number of vectors";
-            assert (dimension > 0) : "No vectors found for field " + fieldName;
-
-            this.size = totalVectorsCount;
-            this.readers = new KnnVectorsReader[allReaders.size()];
-            for (int i = 0; i < readers.length; i++) {
-                readers[i] = allReaders.get(i);
-            }
-
-            // always swap the leading reader to the first position
-            // For this part we need to make sure we also swap all the other metadata arrays that are indexed by reader index
-            // Such as readers, docMaps, liveDocs, baseOrds, deletedOrds
-            if (tempLeadingReaderIdx != 0) {
-                final KnnVectorsReader temp = readers[LEADING_READER_IDX];
-                readers[LEADING_READER_IDX] = readers[tempLeadingReaderIdx];
-                readers[tempLeadingReaderIdx] = temp;
-                // also swap the leading doc map to the first position to match the readers
-                final MergeState.DocMap tempDocMap = docMaps[LEADING_READER_IDX];
-                docMaps[LEADING_READER_IDX] = docMaps[tempLeadingReaderIdx];
-                docMaps[tempLeadingReaderIdx] = tempDocMap;
-                // swap base ords
-                final int tempBaseOrd = baseOrds[LEADING_READER_IDX];
-                baseOrds[LEADING_READER_IDX] = baseOrds[tempLeadingReaderIdx];
-                baseOrds[tempLeadingReaderIdx] = tempBaseOrd;
-            }
-
-            this.perReaderFloatVectorValues = new JVectorFloatVectorValues[readers.length];
-            this.dimension = dimension;
-
-            // Build mapping from global ordinal to [readerIndex, readerOrd]
-            this.ravvOrdToReaderMapping = new int[totalDocsCount][2];
-
-            int documentsIterated = 0;
-
-            // Will be used to build the new graphNodeIdToDocMap with the new graph node id to docId mapping.
-            // This mapping should not be used to access the vectors at any time during construction, but only after the merge is complete
-            // and the new segment is created and used by searchers.
-            final int[] graphNodeIdToDocIds = new int[totalLiveVectorsCount];
-            this.graphNodeIdsToRavvOrds = new int[totalLiveVectorsCount];
-
-            int graphNodeId = 0;
-            if (deletesFound) {
-                // If there are deletes, we need to build a new graph from scratch and compact the graph node ids
-                // TODO: remove this logic once we support incremental graph building with deletes see
-                // https://github.com/opensearch-project/opensearch-jvector/issues/171
-                for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
-                    final JVectorFloatVectorValues values = (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
-                    perReaderFloatVectorValues[readerIdx] = values;
-                    // For each vector in this reader
-                    KnnVectorValues.DocIndexIterator it = values.iterator();
-
-                    for (int docId = it.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = it.nextDoc()) {
-                        if (docMaps[readerIdx].get(docId) == -1) {
-                            log.warn(
-                                "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
-                                docId,
-                                readerIdx
-                            );
-                        } else {
-                            // Mapping from ravv ordinals to [readerIndex, readerOrd]
-                            // Map graph node id to ravv ordinal
-                            // Map graph node id to doc id
-                            final int newGlobalDocId = docMaps[readerIdx].get(docId);
-                            final int ravvLocalOrd = it.index();
-                            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
-                            graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
-                            graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
-                            graphNodeId++;
-                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
-                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
-                        }
-
-                        documentsIterated++;
-                    }
-                }
-            } else {
-                // If there are no deletes, we can reuse the existing graph and simply remap the ravv ordinals to the new global doc ids
-                // for the leading reader we must preserve the original node Ids and map them to the corresponding ravv vectors originally
-                // used to build the graph
-                // This is necessary because we are later going to expand that graph with new vectors from the other readers.
-                // The leading reader is ALWAYS the first one in the readers array
-                final JVectorFloatVectorValues leadingReaderValues = (JVectorFloatVectorValues) readers[LEADING_READER_IDX]
-                    .getFloatVectorValues(fieldName);
-                perReaderFloatVectorValues[LEADING_READER_IDX] = leadingReaderValues;
-                var leadingReaderIt = leadingReaderValues.iterator();
-                for (int docId = leadingReaderIt.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = leadingReaderIt.nextDoc()) {
-                    final int newGlobalDocId = docMaps[LEADING_READER_IDX].get(docId);
-                    if (newGlobalDocId == -1) {
-                        log.warn(
-                            "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
-                            docId,
-                            LEADING_READER_IDX
-                        );
-                    } else {
-                        final int ravvLocalOrd = leadingReaderIt.index();
-                        final int ravvGlobalOrd = ravvLocalOrd + baseOrds[LEADING_READER_IDX];
-                        graphNodeIdToDocIds[ravvLocalOrd] = newGlobalDocId;
-                        graphNodeIdsToRavvOrds[ravvLocalOrd] = ravvGlobalOrd;
-                        graphNodeId++;
-                        ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = LEADING_READER_IDX; // Reader index
-                        ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
-                    }
-
-                    documentsIterated++;
-                }
-
-                // For the remaining readers we map the graph node id to the ravv ordinal in the order they appear
-                for (int readerIdx = 1; readerIdx < readers.length; readerIdx++) {
-                    final JVectorFloatVectorValues values = (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
-                    perReaderFloatVectorValues[readerIdx] = values;
-                    // For each vector in this reader
-                    KnnVectorValues.DocIndexIterator it = values.iterator();
-
-                    for (int docId = it.nextDoc(); docId != DocIdSetIterator.NO_MORE_DOCS; docId = it.nextDoc()) {
-                        if (docMaps[readerIdx].get(docId) == -1) {
-                            log.warn(
-                                "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
-                                docId,
-                                readerIdx
-                            );
-                        } else {
-                            // Mapping from ravv ordinals to [readerIndex, readerOrd]
-                            // Map graph node id to ravv ordinal
-                            // Map graph node id to doc id
-                            final int newGlobalDocId = docMaps[readerIdx].get(docId);
-                            final int ravvLocalOrd = it.index();
-                            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
-                            graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
-                            graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
-                            graphNodeId++;
-                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
-                            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
-                        }
-
-                        documentsIterated++;
-                    }
-                }
-            }
-
-            if (documentsIterated < totalVectorsCount) {
-                throw new IllegalStateException(
-                    "More documents were expected than what was found in the readers."
-                        + "Expected at least number of total vectors: "
-                        + totalVectorsCount
-                        + " but found only: "
-                        + documentsIterated
-                        + " documents."
-                );
-            }
-
-            this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(graphNodeIdToDocIds);
-            log.debug("Created RandomAccessMergedFloatVectorValues with {} total vectors from {} readers", size, readers.length);
-
-        }
-
-        /**
-         * Merges the float vector values from multiple readers into a unified structure.
-         * This process includes handling product quantization (PQ) for vector compression,
-         * generating ord-to-doc mappings, and writing the merged index into a new segment file.
-         * <p>
-         * The method determines if pre-existing product quantization codebooks are available
-         * from the leading reader. If available, it refines them using remaining vectors
-         * from other readers in the merge. If no pre-existing codebooks are found and
-         * the total vector count meets the required minimum threshold, new codebooks
-         * and compressed vectors are computed. Otherwise, no PQ compression is applied.
-         * <p>
-         * Also, it generates a mapping of ordinals to document IDs by iterating through
-         * the provided vector data, which is further used to write the field data.
-         * <p>
-         * In the event of no deletes or quantization, the graph construction is done by incrementally adding vectors from smaller segments into the largest segment.
-         * For all other cases, we build a new graph from scratch from all the vectors.
-         *
-         * TODO: Add support for incremental graph building with quantization see <a href="https://github.com/opensearch-project/opensearch-jvector/issues/166">issue</a>
-         *
-         * @throws IOException if there is an issue during reading or writing vector data.
-         */
-        public void merge() throws IOException {
-            // This section creates the PQVectors to be used for this merge
-            // Get PQ compressor for leading reader
-            final int totalVectorsCount = size;
-            final String fieldName = fieldInfo.name;
-            final PQVectors pqVectors;
-            final OnHeapGraphIndex graph;
-            // Get the leading reader
-            PerFieldKnnVectorsFormat.FieldsReader fieldsReader = (PerFieldKnnVectorsFormat.FieldsReader) readers[LEADING_READER_IDX];
-            JVectorReader leadingReader = (JVectorReader) fieldsReader.getFieldReader(fieldName);
-            final BuildScoreProvider buildScoreProvider;
-            // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the remaining vectors
-            if (leadingReader.getProductQuantizationForField(fieldInfo.name).isEmpty()) {
-                // No pre-existing codebooks, check if we have enough vectors to trigger quantization
-                log.info(
-                    "No Pre-existing PQ codebooks found in this merge for field {} in segment {}, will check if a new codebooks is necessary",
-                    fieldName,
-                    mergeState.segmentInfo.name
-                );
-                if (this.size() >= minimumBatchSizeForQuantization) {
-                    log.info(
-                        "Calculating new codebooks and compressed vectors for field: {}, with totalVectorCount: {}, above minimumBatchSizeForQuantization: {}",
-                        fieldName,
-                        totalVectorsCount,
-                        minimumBatchSizeForQuantization
-                    );
-                    pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
-                } else {
-                    log.info(
-                        "Not enough vectors found for field: {}, totalVectorCount: {}, is below minimumBatchSizeForQuantization: {}",
-                        fieldName,
-                        totalVectorsCount,
-                        minimumBatchSizeForQuantization
-                    );
-                    pqVectors = null;
-                }
-            } else {
-                log.info(
-                    "Pre-existing PQ codebooks found in this merge for field {} in segment {}, will refine the codebooks from the leading reader with the remaining vectors",
-                    fieldName,
-                    mergeState.segmentInfo.name
-                );
-                final long start = Clock.systemDefaultZone().millis();
-                ProductQuantization leadingCompressor = leadingReader.getProductQuantizationForField(fieldName).get();
-                // Refine the leadingCompressor with the remaining vectors in the merge, we skip the leading reader since it's already been
-                // used to create the leadingCompressor
-                // We assume the leading reader is ALWAYS the first one in the readers array
-                for (int i = LEADING_READER_IDX + 1; i < readers.length; i++) {
-                    final FloatVectorValues values = readers[i].getFloatVectorValues(fieldName);
-                    final RandomAccessVectorValues randomAccessVectorValues = new RandomAccessVectorValuesOverVectorValues(values);
-                    leadingCompressor.refine(randomAccessVectorValues);
-                }
-                final long end = Clock.systemDefaultZone().millis();
-                final long trainingTime = end - start;
-                log.info("Refined PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
-                KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
-                pqVectors = PQVectors.encodeAndBuild(
-                    leadingCompressor,
-                    graphNodeIdsToRavvOrds.length,
-                    graphNodeIdsToRavvOrds,
-                    this,
-                    SIMD_POOL_MERGE
-                );
-            }
-
-            if (pqVectors == null) {
-                buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(
-                    this,
-                    graphNodeIdsToRavvOrds,
-                    getVectorSimilarityFunction(fieldInfo)
-                );
-                // graph = getGraph(buildScoreProvider, this, newToOldOrds, fieldInfo, segmentWriteState.segmentInfo.name);
-                if (!deletesFound) {
-                    final String segmentName = segmentWriteState.segmentInfo.name;
-                    log.info(
-                        "No deletes found, and no PQ codebooks found, expanding previous graph with additional vectors for field {} in segment {}",
-                        fieldName,
-                        segmentName
-                    );
-                    final RandomAccessReader leadingOnHeapGraphReader = leadingReader.getNeighborsScoreCacheForField(fieldName);
-                    final int numBaseVectors = leadingReader.getFloatVectorValues(fieldName).size();
-                    graph = (OnHeapGraphIndex) GraphIndexBuilder.buildAndMergeNewNodes(
-                        leadingOnHeapGraphReader,
-                        this,
-                        buildScoreProvider,
-                        numBaseVectors,
-                        graphNodeIdsToRavvOrds,
-                        beamWidth,
-                        degreeOverflow,
-                        alpha,
-                        hierarchyEnabled
-                    );
-                } else {
-                    log.info("Deletes found, and no PQ codebooks found, building new graph from scratch");
-                    graph = getGraph(
-                        buildScoreProvider,
-                        this,
-                        graphNodeIdsToRavvOrds,
-                        fieldInfo,
-                        segmentWriteState.segmentInfo.name,
-                        SIMD_POOL_MERGE
-                    );
-                }
-            } else {
-                log.info("PQ codebooks found, building graph from scratch with PQ vectors");
-                buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(getVectorSimilarityFunction(fieldInfo), pqVectors);
-                // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD threads)
-                buildScoreProvider.diversityProviderFor(0);
-                graph = getGraph(
-                    buildScoreProvider,
-                    this,
-                    graphNodeIdsToRavvOrds,
-                    fieldInfo,
-                    segmentWriteState.segmentInfo.name,
-                    SIMD_POOL_MERGE
-                );
-            }
-
-            writeField(fieldInfo, this, pqVectors, graphNodeIdsToRavvOrds, graphNodeIdToDocMap, graph);
+    public void merge() throws IOException {
+      // This section creates the PQVectors to be used for this merge
+      // Get PQ compressor for leading reader
+      final int totalVectorsCount = size;
+      final String fieldName = fieldInfo.name;
+      final PQVectors pqVectors;
+      final OnHeapGraphIndex graph;
+      // Get the leading reader
+      PerFieldKnnVectorsFormat.FieldsReader fieldsReader =
+          (PerFieldKnnVectorsFormat.FieldsReader) readers[LEADING_READER_IDX];
+      JVectorReader leadingReader = (JVectorReader) fieldsReader.getFieldReader(fieldName);
+      final BuildScoreProvider buildScoreProvider;
+      // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the
+      // remaining vectors
+      if (leadingReader.getProductQuantizationForField(fieldInfo.name).isEmpty()) {
+        // No pre-existing codebooks, check if we have enough vectors to trigger quantization
+        log.info(
+            "No Pre-existing PQ codebooks found in this merge for field {} in segment {}, will check if a new codebooks is necessary",
+            fieldName,
+            mergeState.segmentInfo.name);
+        if (this.size() >= minimumBatchSizeForQuantization) {
+          log.info(
+              "Calculating new codebooks and compressed vectors for field: {}, with totalVectorCount: {}, above minimumBatchSizeForQuantization: {}",
+              fieldName,
+              totalVectorsCount,
+              minimumBatchSizeForQuantization);
+          pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
+        } else {
+          log.info(
+              "Not enough vectors found for field: {}, totalVectorCount: {}, is below minimumBatchSizeForQuantization: {}",
+              fieldName,
+              totalVectorsCount,
+              minimumBatchSizeForQuantization);
+          pqVectors = null;
         }
-
-        @Override
-        public int size() {
-            return size;
+      } else {
+        log.info(
+            "Pre-existing PQ codebooks found in this merge for field {} in segment {}, will refine the codebooks from the leading reader with the remaining vectors",
+            fieldName,
+            mergeState.segmentInfo.name);
+        final long start = Clock.systemDefaultZone().millis();
+        ProductQuantization leadingCompressor =
+            leadingReader.getProductQuantizationForField(fieldName).get();
+        // Refine the leadingCompressor with the remaining vectors in the merge, we skip the leading
+        // reader since it's already been
+        // used to create the leadingCompressor
+        // We assume the leading reader is ALWAYS the first one in the readers array
+        for (int i = LEADING_READER_IDX + 1; i < readers.length; i++) {
+          final FloatVectorValues values = readers[i].getFloatVectorValues(fieldName);
+          final RandomAccessVectorValues randomAccessVectorValues =
+              new RandomAccessVectorValuesOverVectorValues(values);
+          leadingCompressor.refine(randomAccessVectorValues);
         }
-
-        @Override
-        public int dimension() {
-            return dimension;
+        final long end = Clock.systemDefaultZone().millis();
+        final long trainingTime = end - start;
+        log.info("Refined PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
+        KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
+        pqVectors =
+            PQVectors.encodeAndBuild(
+                leadingCompressor,
+                graphNodeIdsToRavvOrds.length,
+                graphNodeIdsToRavvOrds,
+                this,
+                SIMD_POOL_MERGE);
+      }
+
+      if (pqVectors == null) {
+        buildScoreProvider =
+            BuildScoreProvider.randomAccessScoreProvider(
+                this, graphNodeIdsToRavvOrds, getVectorSimilarityFunction(fieldInfo));
+        // graph = getGraph(buildScoreProvider, this, newToOldOrds, fieldInfo,
+        // segmentWriteState.segmentInfo.name);
+        if (!deletesFound) {
+          final String segmentName = segmentWriteState.segmentInfo.name;
+          log.info(
+              "No deletes found, and no PQ codebooks found, expanding previous graph with additional vectors for field {} in segment {}",
+              fieldName,
+              segmentName);
+          final RandomAccessReader leadingOnHeapGraphReader =
+              leadingReader.getNeighborsScoreCacheForField(fieldName);
+          final int numBaseVectors = leadingReader.getFloatVectorValues(fieldName).size();
+          graph =
+              (OnHeapGraphIndex)
+                  GraphIndexBuilder.buildAndMergeNewNodes(
+                      leadingOnHeapGraphReader,
+                      this,
+                      buildScoreProvider,
+                      numBaseVectors,
+                      graphNodeIdsToRavvOrds,
+                      beamWidth,
+                      degreeOverflow,
+                      alpha,
+                      hierarchyEnabled);
+        } else {
+          log.info("Deletes found, and no PQ codebooks found, building new graph from scratch");
+          graph =
+              getGraph(
+                  buildScoreProvider,
+                  this,
+                  graphNodeIdsToRavvOrds,
+                  fieldInfo,
+                  segmentWriteState.segmentInfo.name,
+                  SIMD_POOL_MERGE);
         }
+      } else {
+        log.info("PQ codebooks found, building graph from scratch with PQ vectors");
+        buildScoreProvider =
+            BuildScoreProvider.pqBuildScoreProvider(
+                getVectorSimilarityFunction(fieldInfo), pqVectors);
+        // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD
+        // threads)
+        buildScoreProvider.diversityProviderFor(0);
+        graph =
+            getGraph(
+                buildScoreProvider,
+                this,
+                graphNodeIdsToRavvOrds,
+                fieldInfo,
+                segmentWriteState.segmentInfo.name,
+                SIMD_POOL_MERGE);
+      }
 
-        @Override
-        public VectorFloat<?> getVector(int ord) {
-            if (ord < 0 || ord >= totalDocsCount) {
-                throw new IllegalArgumentException("Ordinal out of bounds: " + ord);
-            }
+      writeField(fieldInfo, this, pqVectors, graphNodeIdsToRavvOrds, graphNodeIdToDocMap, graph);
+    }
 
-            final int readerIdx = ravvOrdToReaderMapping[ord][READER_ID];
-            final int readerOrd = ravvOrdToReaderMapping[ord][READER_ORD];
+    @Override
+    public int size() {
+      return size;
+    }
 
-            // Access to float values is not thread safe
-            synchronized (perReaderFloatVectorValues[readerIdx]) {
-                return perReaderFloatVectorValues[readerIdx].vectorFloatValue(readerOrd);
-            }
-        }
+    @Override
+    public int dimension() {
+      return dimension;
+    }
 
-        @Override
-        public boolean isValueShared() {
-            return false;
-        }
+    @Override
+    public VectorFloat<?> getVector(int ord) {
+      if (ord < 0 || ord >= totalDocsCount) {
+        throw new IllegalArgumentException("Ordinal out of bounds: " + ord);
+      }
+
+      final int readerIdx = ravvOrdToReaderMapping[ord][READER_ID];
+      final int readerOrd = ravvOrdToReaderMapping[ord][READER_ORD];
+
+      // Access to float values is not thread safe
+      synchronized (perReaderFloatVectorValues[readerIdx]) {
+        return perReaderFloatVectorValues[readerIdx].vectorFloatValue(readerOrd);
+      }
+    }
 
-        @Override
-        public RandomAccessVectorValues copy() {
-            throw new UnsupportedOperationException("Copy not supported");
-        }
+    @Override
+    public boolean isValueShared() {
+      return false;
     }
 
-    /**
-     * This method will return the graph index for the field
-     * @return OnHeapGraphIndex
-     */
-    public OnHeapGraphIndex getGraph(
-        BuildScoreProvider buildScoreProvider,
-        RandomAccessVectorValues randomAccessVectorValues,
-        int[] newToOldOrds,
-        FieldInfo fieldInfo,
-        String segmentName,
-        ForkJoinPool SIMD_POOL
-    ) {
-        final GraphIndexBuilder graphIndexBuilder = new GraphIndexBuilder(
+    @Override
+    public RandomAccessVectorValues copy() {
+      throw new UnsupportedOperationException("Copy not supported");
+    }
+  }
+
+  /**
+   * This method will return the graph index for the field
+   *
+   * @return OnHeapGraphIndex
+   */
+  public OnHeapGraphIndex getGraph(
+      BuildScoreProvider buildScoreProvider,
+      RandomAccessVectorValues randomAccessVectorValues,
+      int[] newToOldOrds,
+      FieldInfo fieldInfo,
+      String segmentName,
+      ForkJoinPool SIMD_POOL) {
+    final GraphIndexBuilder graphIndexBuilder =
+        new GraphIndexBuilder(
             buildScoreProvider,
             fieldInfo.getVectorDimension(),
             maxConn,
             beamWidth,
             degreeOverflow,
             alpha,
-            hierarchyEnabled
-        );
-
-        /*
-         * We cannot always use randomAccessVectorValues for the graph building
-         * because it's size will not always correspond to the document count.
-         * To have the right mapping from docId to vector ordinal we need to use the mergedFloatVector.
-         * This is the case when we are merging segments and we might have more documents than vectors.
-         */
-        final long start = Clock.systemDefaultZone().millis();
-        final OnHeapGraphIndex graphIndex;
-        var vv = randomAccessVectorValues.threadLocalSupplier();
-
-        log.info("Building graph from merged float vector");
-        // parallel graph construction from the merge documents Ids
-        SIMD_POOL.submit(() -> IntStream.range(0, newToOldOrds.length).parallel().forEach(ord -> {
-            graphIndexBuilder.addGraphNode(ord, vv.get().getVector(newToOldOrds[ord]));
-        })).join();
-        graphIndexBuilder.cleanup();
-        graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();
-        final long end = Clock.systemDefaultZone().millis();
+            hierarchyEnabled);
 
-        log.info("Built graph for field {} in segment {} in {} millis", fieldInfo.name, segmentName, end - start);
-        return graphIndex;
+    /*
+     * We cannot always use randomAccessVectorValues for the graph building
+     * because it's size will not always correspond to the document count.
+     * To have the right mapping from docId to vector ordinal we need to use the mergedFloatVector.
+     * This is the case when we are merging segments and we might have more documents than vectors.
+     */
+    final long start = Clock.systemDefaultZone().millis();
+    final OnHeapGraphIndex graphIndex;
+    var vv = randomAccessVectorValues.threadLocalSupplier();
+
+    log.info("Building graph from merged float vector");
+    // parallel graph construction from the merge documents Ids
+    SIMD_POOL
+        .submit(
+            () ->
+                IntStream.range(0, newToOldOrds.length)
+                    .parallel()
+                    .forEach(
+                        ord -> {
+                          graphIndexBuilder.addGraphNode(
+                              ord, vv.get().getVector(newToOldOrds[ord]));
+                        }))
+        .join();
+    graphIndexBuilder.cleanup();
+    graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();
+    final long end = Clock.systemDefaultZone().millis();
+
+    log.info(
+        "Built graph for field {} in segment {} in {} millis",
+        fieldInfo.name,
+        segmentName,
+        end - start);
+    return graphIndex;
+  }
+
+  static class RandomAccessVectorValuesOverVectorValues implements RandomAccessVectorValues {
+    private final VectorTypeSupport VECTOR_TYPE_SUPPORT =
+        VectorizationProvider.getInstance().getVectorTypeSupport();
+    private final FloatVectorValues values;
+
+    public RandomAccessVectorValuesOverVectorValues(FloatVectorValues values) {
+      this.values = values;
     }
 
-    static class RandomAccessVectorValuesOverVectorValues implements RandomAccessVectorValues {
-        private final VectorTypeSupport VECTOR_TYPE_SUPPORT = VectorizationProvider.getInstance().getVectorTypeSupport();
-        private final FloatVectorValues values;
-
-        public RandomAccessVectorValuesOverVectorValues(FloatVectorValues values) {
-            this.values = values;
-        }
-
-        @Override
-        public int size() {
-            return values.size();
-        }
-
-        @Override
-        public int dimension() {
-            return values.dimension();
-        }
+    @Override
+    public int size() {
+      return values.size();
+    }
 
-        @Override
-        public VectorFloat<?> getVector(int nodeId) {
-            try {
-                // Access to float values is not thread safe
-                synchronized (this) {
-                    final float[] vector = values.vectorValue(nodeId);
-                    final float[] copy = new float[vector.length];
-                    System.arraycopy(vector, 0, copy, 0, vector.length);
-                    return VECTOR_TYPE_SUPPORT.createFloatVector(copy);
-                }
-            } catch (IOException e) {
-                log.error("Error retrieving vector at ordinal {}", nodeId, e);
-                throw new RuntimeException(e);
-            }
-        }
+    @Override
+    public int dimension() {
+      return values.dimension();
+    }
 
-        @Override
-        public boolean isValueShared() {
-            return false;
+    @Override
+    public VectorFloat<?> getVector(int nodeId) {
+      try {
+        // Access to float values is not thread safe
+        synchronized (this) {
+          final float[] vector = values.vectorValue(nodeId);
+          final float[] copy = new float[vector.length];
+          System.arraycopy(vector, 0, copy, 0, vector.length);
+          return VECTOR_TYPE_SUPPORT.createFloatVector(copy);
         }
+      } catch (IOException e) {
+        log.error("Error retrieving vector at ordinal {}", nodeId, e);
+        throw new RuntimeException(e);
+      }
+    }
 
-        @Override
-        public RandomAccessVectorValues copy() {
-            throw new UnsupportedOperationException("Copy not supported");
-        }
+    @Override
+    public boolean isValueShared() {
+      return false;
     }
 
+    @Override
+    public RandomAccessVectorValues copy() {
+      throw new UnsupportedOperationException("Copy not supported");
+    }
+  }
 }
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index b562e52fd4a1..b2f2ea075d3d 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -17,7 +17,16 @@
 
 package org.opensearch.knn.index.codec.jvector;
 
+import static org.opensearch.knn.common.KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+import static org.opensearch.knn.index.engine.CommonTestUtils.getCodec;
+
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
+import java.io.IOException;
+import java.nio.file.Path;
+import java.util.*;
+import java.util.concurrent.*;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicInteger;
 import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.document.*;
 import org.apache.lucene.index.*;
@@ -33,1537 +42,1635 @@
 import org.opensearch.knn.index.ThreadLeakFiltersForTests;
 import org.opensearch.knn.plugin.stats.KNNCounter;
 
-import java.io.IOException;
-import java.nio.file.Path;
-import java.util.*;
-import java.util.concurrent.*;
-import java.util.concurrent.atomic.AtomicBoolean;
-import java.util.concurrent.atomic.AtomicInteger;
-
-import static org.opensearch.knn.common.KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
-import static org.opensearch.knn.index.engine.CommonTestUtils.getCodec;
-
-/**
- * Test used specifically for JVector
- */
-// Currently {@link IndexGraphBuilder} is using the default ForkJoinPool.commonPool() which is not being shutdown.
+/** Test used specifically for JVector */
+// Currently {@link IndexGraphBuilder} is using the default ForkJoinPool.commonPool() which is not
+// being shutdown.
 // Ignore thread leaks until we remove the ForkJoinPool.commonPool() usage from IndexGraphBuilder
-// TODO: Wire the execution thread pool to {@link IndexGraphBuilder} to avoid the failure of the UT due to leaked thread pool warning.
-@ThreadLeakFilters(defaultFilters = true, filters = { ThreadLeakFiltersForTests.class })
+// TODO: Wire the execution thread pool to {@link IndexGraphBuilder} to avoid the failure of the UT
+// due to leaked thread pool warning.
+@ThreadLeakFilters(
+    defaultFilters = true,
+    filters = {ThreadLeakFiltersForTests.class})
 @LuceneTestCase.SuppressSysoutChecks(bugUrl = "")
 @Log4j2
 public class KNNJVectorTests extends LuceneTestCase {
-    private static final String TEST_FIELD = "test_field";
-    private static final String TEST_ID_FIELD = "id";
-
-    /**
-     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
-     * in the index.
-     * Single field is used to store the vectors.
-     * All the documents are stored in a single segment.
-     * Single commit without refreshing the index.
-     * No merge.
-     */
-    @Test
-    public void testJVectorKnnIndex_simpleCase() throws IOException {
-        int k = 3; // The number of nearest neighbors to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f / i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                w.addDocument(doc);
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.commit();
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(9, topDocs.scoreDocs[0].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                assertEquals(8, topDocs.scoreDocs[1].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 9.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                assertEquals(7, topDocs.scoreDocs[2].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
-        log.info("successfully closed directory");
+  private static final String TEST_FIELD = "test_field";
+  private static final String TEST_ID_FIELD = "id";
+
+  /**
+   * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+   * in the index. Single field is used to store the vectors. All the documents are stored in a
+   * single segment. Single commit without refreshing the index. No merge.
+   */
+  @Test
+  public void testJVectorKnnIndex_simpleCase() throws IOException {
+    int k = 3; // The number of nearest neighbors to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f / i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        w.addDocument(doc);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.commit();
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(9, topDocs.scoreDocs[0].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 10.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        assertEquals(8, topDocs.scoreDocs[1].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 9.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        assertEquals(7, topDocs.scoreDocs[2].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test the scenario when not all documents are populated with the vector field
-     */
-    public void testMissing_fields() throws IOException {
-        final int k = 3; // The number of nearest neighbors to gather
-        final int totalNumberOfDocs = 10;
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 0; i < totalNumberOfDocs; i++) {
-                final Document doc = new Document();
-                if (i % 2 == 0) {
-                    final float[] source = new float[] { 0.0f, i };
-                    doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
-                }
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                w.addDocument(doc);
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.commit();
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(0, topDocs.scoreDocs[0].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 0.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                assertEquals(2, topDocs.scoreDocs[1].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                assertEquals(4, topDocs.scoreDocs[2].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 4.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
+    log.info("successfully closed directory");
+  }
+
+  /** Test the scenario when not all documents are populated with the vector field */
+  public void testMissing_fields() throws IOException {
+    final int k = 3; // The number of nearest neighbors to gather
+    final int totalNumberOfDocs = 10;
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 0; i < totalNumberOfDocs; i++) {
+        final Document doc = new Document();
+        if (i % 2 == 0) {
+          final float[] source = new float[] {0.0f, i};
+          doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
         }
-        log.info("successfully closed directory");
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        w.addDocument(doc);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.commit();
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(0, topDocs.scoreDocs[0].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 0.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        assertEquals(2, topDocs.scoreDocs[1].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 2.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        assertEquals(4, topDocs.scoreDocs[2].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 4.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test the scenario when the index is sorted by a doc value
-     * We want to make sure the docIDs are correctly mapped to the jVector ordinals
-     * @throws IOException if an I/O error occurs
-     */
-    public void test_sorted_index() throws IOException {
-        final int k = 3; // The number of nearest neighbors to gather
-        final int totalNumberOfDocs = 10;
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-        final String sortFieldName = "sorted_field";
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        // Add index sorting configuration
-        indexWriterConfig.setIndexSort(new Sort(new SortField(sortFieldName, SortField.Type.INT, true))); // true = reverse order
-
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 0; i < totalNumberOfDocs; i++) {
-                final Document doc = new Document();
-                final float[] source = new float[] { 0.0f, i };
-                doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                // Add the sortable field
-                doc.add(new NumericDocValuesField(sortFieldName, i));
-                w.addDocument(doc);
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.commit();
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(9, topDocs.scoreDocs[0].doc);
-                assertEquals(0, reader.storedFields().document(topDocs.scoreDocs[0].doc).getField(TEST_ID_FIELD).numericValue().intValue());
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 0.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                assertEquals(8, topDocs.scoreDocs[1].doc);
-                assertEquals(1, reader.storedFields().document(topDocs.scoreDocs[1].doc).getField(TEST_ID_FIELD).numericValue().intValue());
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                assertEquals(7, topDocs.scoreDocs[2].doc);
-                assertEquals(2, reader.storedFields().document(topDocs.scoreDocs[2].doc).getField(TEST_ID_FIELD).numericValue().intValue());
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
-        log.info("successfully closed directory");
+    log.info("successfully closed directory");
+  }
+
+  /**
+   * Test the scenario when the index is sorted by a doc value We want to make sure the docIDs are
+   * correctly mapped to the jVector ordinals
+   *
+   * @throws IOException if an I/O error occurs
+   */
+  public void test_sorted_index() throws IOException {
+    final int k = 3; // The number of nearest neighbors to gather
+    final int totalNumberOfDocs = 10;
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+    final String sortFieldName = "sorted_field";
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    // Add index sorting configuration
+    indexWriterConfig.setIndexSort(
+        new Sort(new SortField(sortFieldName, SortField.Type.INT, true))); // true = reverse order
+
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 0; i < totalNumberOfDocs; i++) {
+        final Document doc = new Document();
+        final float[] source = new float[] {0.0f, i};
+        doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        // Add the sortable field
+        doc.add(new NumericDocValuesField(sortFieldName, i));
+        w.addDocument(doc);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.commit();
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(9, topDocs.scoreDocs[0].doc);
+        assertEquals(
+            0,
+            reader
+                .storedFields()
+                .document(topDocs.scoreDocs[0].doc)
+                .getField(TEST_ID_FIELD)
+                .numericValue()
+                .intValue());
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 0.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        assertEquals(8, topDocs.scoreDocs[1].doc);
+        assertEquals(
+            1,
+            reader
+                .storedFields()
+                .document(topDocs.scoreDocs[1].doc)
+                .getField(TEST_ID_FIELD)
+                .numericValue()
+                .intValue());
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        assertEquals(7, topDocs.scoreDocs[2].doc);
+        assertEquals(
+            2,
+            reader
+                .storedFields()
+                .document(topDocs.scoreDocs[2].doc)
+                .getField(TEST_ID_FIELD)
+                .numericValue()
+                .intValue());
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 2.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
-     * in the index.
-     * Single field is used to store the vectors.
-     * Documents are stored in a multiple segments.
-     * Multiple commits without refreshing the index.
-     * No merge.
-     */
-    @Test
-    public void testJVectorKnnIndex_multipleSegments() throws IOException {
-        int k = 3; // The number of nearest neighbours to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f / i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                w.addDocument(doc);
-                w.commit(); // this creates a new segment
-            }
-            log.info("Done writing all files to the file system");
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have 10 segments, each with a single document");
-                Assert.assertEquals(10, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = new KnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(9, topDocs.scoreDocs[0].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                assertEquals(8, topDocs.scoreDocs[1].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 9.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                assertEquals(7, topDocs.scoreDocs[2].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
+    log.info("successfully closed directory");
+  }
+
+  /**
+   * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+   * in the index. Single field is used to store the vectors. Documents are stored in a multiple
+   * segments. Multiple commits without refreshing the index. No merge.
+   */
+  @Test
+  public void testJVectorKnnIndex_multipleSegments() throws IOException {
+    int k = 3; // The number of nearest neighbours to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f / i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        w.addDocument(doc);
+        w.commit(); // this creates a new segment
+      }
+      log.info("Done writing all files to the file system");
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have 10 segments, each with a single document");
+        Assert.assertEquals(10, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(9, topDocs.scoreDocs[0].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 10.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        assertEquals(8, topDocs.scoreDocs[1].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 9.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        assertEquals(7, topDocs.scoreDocs[2].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
-     * in the index.
-     * Single field is used to store the vectors.
-     * Documents are stored in a multiple segments.
-     * Multiple commits without refreshing the index.
-     * Merge is enabled.
-     */
-    @Test
-    public void testJVectorKnnIndex_mergeEnabled() throws IOException {
-        int k = 3; // The number of nearest neighbours to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f * i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
-                w.addDocument(doc);
-                w.commit(); // this creates a new segment without triggering a merge
-            }
-            log.info("Done writing all files to the file system");
-
-            w.forceMerge(1); // this merges all segments into a single segment
-            log.info("Done merging all segments");
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have 1 segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
-                assertEquals("1", doc.get("my_doc_id"));
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                doc = reader.storedFields().document(topDocs.scoreDocs[1].doc);
-                assertEquals("2", doc.get("my_doc_id"));
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                doc = reader.storedFields().document(topDocs.scoreDocs[2].doc);
-                assertEquals("3", doc.get("my_doc_id"));
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 3.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  /**
+   * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+   * in the index. Single field is used to store the vectors. Documents are stored in a multiple
+   * segments. Multiple commits without refreshing the index. Merge is enabled.
+   */
+  @Test
+  public void testJVectorKnnIndex_mergeEnabled() throws IOException {
+    int k = 3; // The number of nearest neighbours to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f * i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
+        w.addDocument(doc);
+        w.commit(); // this creates a new segment without triggering a merge
+      }
+      log.info("Done writing all files to the file system");
+
+      w.forceMerge(1); // this merges all segments into a single segment
+      log.info("Done merging all segments");
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have 1 segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
+        assertEquals("1", doc.get("my_doc_id"));
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        doc = reader.storedFields().document(topDocs.scoreDocs[1].doc);
+        assertEquals("2", doc.get("my_doc_id"));
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 2.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        doc = reader.storedFields().document(topDocs.scoreDocs[2].doc);
+        assertEquals("3", doc.get("my_doc_id"));
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 3.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test to verify that the jVector codec is able to successfully search for the nearest neighbors
-     * in the index.
-     * Single field is used to store the vectors.
-     * Documents are stored in potentially multiple segments.
-     * Multiple commits.
-     * Multiple merges.
-     */
-    @Test
-    public void multipleMerges() throws IOException {
-        int k = 3; // The number of nearest neighbours to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-        final Path indexPath = createTempDir();
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f * i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
-                doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
-                w.addDocument(doc);
-                w.commit(); // this creates a new segment without triggering a merge
-                w.forceMerge(1); // this merges all segments into a single segment
-            }
-            log.info("Done writing all files to the file system");
-
-            w.forceMerge(1); // this merges all segments into a single segment
-            log.info("Done merging all segments");
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have 1 segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
-                assertEquals("1", doc.get("my_doc_id"));
-                Assert.assertEquals(
-                    vectorSimilarityFunction.compare(target, new float[] { 0.0f, 1.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                doc = reader.storedFields().document(topDocs.scoreDocs[1].doc);
-                assertEquals("2", doc.get("my_doc_id"));
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 2.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                doc = reader.storedFields().document(topDocs.scoreDocs[2].doc);
-                assertEquals("3", doc.get("my_doc_id"));
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 3.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  /**
+   * Test to verify that the jVector codec is able to successfully search for the nearest neighbors
+   * in the index. Single field is used to store the vectors. Documents are stored in potentially
+   * multiple segments. Multiple commits. Multiple merges.
+   */
+  @Test
+  public void multipleMerges() throws IOException {
+    int k = 3; // The number of nearest neighbours to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+    final Path indexPath = createTempDir();
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f * i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+        doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
+        w.addDocument(doc);
+        w.commit(); // this creates a new segment without triggering a merge
+        w.forceMerge(1); // this merges all segments into a single segment
+      }
+      log.info("Done writing all files to the file system");
+
+      w.forceMerge(1); // this merges all segments into a single segment
+      log.info("Done merging all segments");
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have 1 segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
+        assertEquals("1", doc.get("my_doc_id"));
+        Assert.assertEquals(
+            vectorSimilarityFunction.compare(target, new float[] {0.0f, 1.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        doc = reader.storedFields().document(topDocs.scoreDocs[1].doc);
+        assertEquals("2", doc.get("my_doc_id"));
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 2.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        doc = reader.storedFields().document(topDocs.scoreDocs[2].doc);
+        assertEquals("3", doc.get("my_doc_id"));
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 3.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test to verify that the jVector codec is able to successfully search for the nearest neighbours
-     * in the index.
-     * A Single field is used to store the vectors.
-     * Documents are stored in potentially multiple segments.
-     * Multiple commits.
-     * Multiple merges.
-     * Large batches
-     * Use a compound file
-     */
-    @Test
-    public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization() throws IOException {
-        int segmentSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
-        int totalNumberOfDocs = segmentSize * 4;
-        int k = 3; // The number of nearest neighbors to gather
-
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(true);
-        indexWriterConfig.setCodec(getCodec(Integer.MAX_VALUE)); // effectively without quantization
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
-        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f / i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
-                w.addDocument(doc);
-                if (i % segmentSize == 0) {
-                    w.commit(); // this creates a new segment without triggering a merge
-                }
-            }
-            log.info("Done writing all files to the file system");
-
-            w.forceMerge(1); // this merges all segments into a single segment
-            log.info("Done merging all segments");
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have 1 segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-
-                float expectedMinScoreInTopK = VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, k });
-                final float recall = calculateRecall(topDocs, expectedMinScoreInTopK);
-                Assert.assertEquals(1.0f, recall, 0.01f);
-
-                log.info("successfully completed search tests");
-            }
+  }
+
+  /**
+   * Test to verify that the jVector codec is able to successfully search for the nearest neighbours
+   * in the index. A Single field is used to store the vectors. Documents are stored in potentially
+   * multiple segments. Multiple commits. Multiple merges. Large batches Use a compound file
+   */
+  @Test
+  public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
+      throws IOException {
+    int segmentSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+    int totalNumberOfDocs = segmentSize * 4;
+    int k = 3; // The number of nearest neighbors to gather
+
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(true);
+    indexWriterConfig.setCodec(getCodec(Integer.MAX_VALUE)); // effectively without quantization
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+    indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+    // We set the below parameters to make sure no permature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f / i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        doc.add(new StringField("my_doc_id", Integer.toString(i, 10), Field.Store.YES));
+        w.addDocument(doc);
+        if (i % segmentSize == 0) {
+          w.commit(); // this creates a new segment without triggering a merge
         }
+      }
+      log.info("Done writing all files to the file system");
+
+      w.forceMerge(1); // this merges all segments into a single segment
+      log.info("Done merging all segments");
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have 1 segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+
+        float expectedMinScoreInTopK =
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, k});
+        final float recall = calculateRecall(topDocs, expectedMinScoreInTopK);
+        Assert.assertEquals(1.0f, recall, 0.01f);
+
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Similar to testJVectorKnnIndex_multiple_merges_large_batches_no_quantization but with random vectors
-     * It's important to add more randomness to the vectors to make sure the graph is not linear
-     * @throws IOException if an I/O error occurs
-     */
-    @Test
-    public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization_with_random_vectors() throws IOException {
-        int segmentSize = 200;
-        int totalNumberOfDocs = segmentSize * 4;
-        int k = 3; // The number of nearest neighbors to gather
-        final int dimension = 2;
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-        final float[] target = TestUtils.generateRandomVectors(1, dimension)[0];
-        final float[][] source = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
-        final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, source, k, vectorSimilarityFunction);
-
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(true);
-        indexWriterConfig.setCodec(getCodec(Integer.MAX_VALUE)); // effectively without quantization
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
-        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            for (int i = 0; i < source.length; i++) {
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField(TEST_FIELD, source[i], VectorSimilarityFunction.EUCLIDEAN));
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                w.addDocument(doc);
-                if (i % segmentSize == 0) {
-                    w.commit(); // this creates a new segment without triggering a merge
-                }
-            }
-            log.info("Done writing all files to the file system");
-
-            w.forceMerge(1); // this merges all segments into a single segment
-            log.info("Done merging all segments");
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
-                Assert.assertEquals(1.0f, recall, 0.05f);
-                log.info("successfully completed search tests");
-            }
+  }
+
+  /**
+   * Similar to testJVectorKnnIndex_multiple_merges_large_batches_no_quantization but with random
+   * vectors It's important to add more randomness to the vectors to make sure the graph is not
+   * linear
+   *
+   * @throws IOException if an I/O error occurs
+   */
+  @Test
+  public void
+      testJVectorKnnIndex_multiple_merges_large_batches_no_quantization_with_random_vectors()
+          throws IOException {
+    int segmentSize = 200;
+    int totalNumberOfDocs = segmentSize * 4;
+    int k = 3; // The number of nearest neighbors to gather
+    final int dimension = 2;
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+    final float[] target = TestUtils.generateRandomVectors(1, dimension)[0];
+    final float[][] source = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+    final Set<Integer> groundTruthVectorsIds =
+        calculateGroundTruthVectorsIds(target, source, k, vectorSimilarityFunction);
+
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(true);
+    indexWriterConfig.setCodec(getCodec(Integer.MAX_VALUE)); // effectively without quantization
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+    indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+    // We set the below parameters to make sure no permature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      for (int i = 0; i < source.length; i++) {
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField(TEST_FIELD, source[i], VectorSimilarityFunction.EUCLIDEAN));
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        w.addDocument(doc);
+        if (i % segmentSize == 0) {
+          w.commit(); // this creates a new segment without triggering a merge
         }
+      }
+      log.info("Done writing all files to the file system");
+
+      w.forceMerge(1); // this merges all segments into a single segment
+      log.info("Done merging all segments");
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+        Assert.assertEquals(1.0f, recall, 0.05f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Tests the functionality and integrity of a Lucene k-NN index under multiple merge cycles and verifies
-     *  the proper ordering of vectors and document identifiers.
-     *
-     * The method performs the following validation steps:
-     * 1. Indexes a predefined number of documents into a Lucene index, creating many small segments.
-     * Each document
-     *    includes a k-NN float vector field encoding a specific order.
-     * 2. Executes several merge operations on the index (partial and full merges) to validate that the merging
-     *    process maintains correctness and consistency.
-     * 3. Validates the following invariants post-merge:
-     *    (a) Verifies that the index is merged into a single segment.
-     *    (b) Confirms the integrity of vector values by iterating through the merged segment and checking the
-     *        relationship between vector components and document identifiers.
-     *    (c) Performs k-NN searches with various cases:
-     *        - Single-threaded searches using vectors to ensure correct results.
-     *        - Multi-threaded concurrent searches to confirm robustness and verify the index operates correctly
-     *          under concurrent access without exhausting file handles or encountering other issues.
-     *
-     * Assertions are used throughout to ensure the state of the index matches the expected behavior,
-     * validate merge
-     * results, and confirm the accuracy of search operations.
-     * The test also logs the number of successful k-NN queries
-     * during the concurrent search phase.
-     *
-     * @throws IOException if an I/O error occurs during index operations.
-     * @throws InterruptedException if the concurrent search phase is interrupted.
-     */
-    @Test
-    public void testLuceneKnnIndex_multipleMerges_with_ordering_check() throws IOException, InterruptedException {
-        final int numDocs = 10000;
-        final String floatVectorField = "vec";
-        final String expectedDocIdField = "expectedDocId";
-        final Path indexPath = createTempDir();
-        final float[][] sourceVectors = TestUtils.generateRandomVectors(numDocs, 2);
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-
-        try (Directory dir = newFSDirectory(indexPath)) {
-            IndexWriterConfig cfg = newIndexWriterConfig();
-            cfg.setCodec(getCodec());
-            cfg.setUseCompoundFile(false);
-            cfg.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
-            cfg.setMergeScheduler(new SerialMergeScheduler());
-
-            try (IndexWriter w = new IndexWriter(dir, cfg)) {
-                /* ---------- 1. index documents, create many tiny segments ---------- */
-                for (int i = 0; i < numDocs; i++) {
-                    Document doc = new Document();
-                    // vector whose first component encodes the future (segment-local) docID
-                    doc.add(new KnnFloatVectorField(floatVectorField, sourceVectors[i], vectorSimilarityFunction));
-                    doc.add(new StoredField(expectedDocIdField, i));
-                    w.addDocument(doc);
-                }
-                w.commit();
-
-                /* ---------- 2. run several merge cycles ---------- */
-                w.forceMerge(5);  // partial merge
-                w.forceMerge(3);  // another partial merge
-                w.forceMerge(1);  // final full merge
-            }
-
-            /* ---------- 3. open reader and assert the invariant ---------- */
-            try (DirectoryReader reader = DirectoryReader.open(dir)) {
-                assertEquals("we merged down to exactly one segment", 1, reader.leaves().size());
-
-                // (a) iterate through vectors directly
-                for (LeafReaderContext context : reader.leaves()) {
-                    FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
-                    final var docIdSetIterator = vectorValues.iterator(); // iterator for all the vectors with values
-                    int docId = -1;
-                    while ((docId = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
-                        final int luceneDocId = context.docBase + docId;
-                        final int globalDocId = reader.storedFields()
-                            .document(luceneDocId)
-                            .getField(expectedDocIdField)
-                            .storedValue()
-                            .getIntValue();
-                        float[] vectorValue = vectorValues.vectorValue(docIdSetIterator.index());
-                        float[] expectedVectorValue = sourceVectors[globalDocId];
-                        // if the vectors do not match, also look which source vector should be the right result
-                        if (!Arrays.equals(expectedVectorValue, vectorValue)) {
-                            for (int i = 0; i < sourceVectors.length; i++) {
-                                if (Arrays.equals(sourceVectors[i], vectorValue)) {
-                                    log.error(
-                                        "found vector with global id: {}, in docId: {}, however the actual position of the vector in source is: {}",
-                                        globalDocId,
-                                        luceneDocId,
-                                        i
-                                    );
-                                }
-                            }
-                        }
-                        Assert.assertArrayEquals(
-                            "vector with global id "
-                                + globalDocId
-                                + " in source doesn't match vector value in lucene docID "
-                                + luceneDocId
-                                + " on the index",
-                            expectedVectorValue,
-                            vectorValue,
-                            0.0f
-                        );
-                    }
-                }
-
-                // (b) search with the same vector and confirm we are not exhausting the file handles with each search
-                IndexSearcher searcher = newSearcher(reader);
-                LeafReaderContext context = reader.leaves().get(0); // we only have one leaf at this point so we can use it to obtain the
-                                                                    // vector values
-                final int baseDocId = context.docBase;
-                final FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
-                final int k = 1;
-                for (int i = 0; i < reader.maxDoc(); i++) {
-                    float[] query = TestUtils.generateRandomVectors(1, 2)[0];
-                    TopDocs td = searcher.search(getJVectorKnnFloatVectorQuery("vec", query, k, new MatchAllDocsQuery()), k);
-                    assertEquals(k, td.scoreDocs.length);
-
-                    compareSearchResults(td, sourceVectors, reader, expectedDocIdField, baseDocId, vectorValues);
-                }
-
-                // (c) search with the same vector and this time add concurrency to make sure we are still not exhausting the file handles
-                int numThreads = 10; // Number of concurrent search threads
-                int queriesPerThread = 100; // Number of searches per thread
-                ExecutorService executor = Executors.newFixedThreadPool(numThreads);
-                CountDownLatch latch = new CountDownLatch(numThreads);
-                AtomicBoolean failureDetected = new AtomicBoolean(false);
-                AtomicInteger totalQueries = new AtomicInteger(0);
-
-                try {
-                    for (int t = 0; t < numThreads; t++) {
-                        executor.submit(() -> {
-                            int i = 0;
-
-                            try {
-                                for (i = 0; i < queriesPerThread && !failureDetected.get(); i++) {
-                                    float[] query = TestUtils.generateRandomVectors(1, 2)[0];
-                                    try {
-                                        TopDocs td = searcher.search(new KnnFloatVectorQuery("vec", query, k), k);
-                                        assertEquals("Search should return correct number of results", k, td.scoreDocs.length);
-                                        compareSearchResults(td, sourceVectors, reader, expectedDocIdField, baseDocId, vectorValues);
-                                        totalQueries.incrementAndGet();
-                                    } catch (Throwable e) {
-                                        failureDetected.compareAndSet(false, true);
-                                        log.error("Exception encountered", e);
-                                        fail("Exception during concurrent search: " + e.getMessage());
-                                    }
-                                }
-                            } finally {
-                                latch.countDown();
-                                log.warn("Ran {} queries", i);
-                            }
-                        });
-                    }
-
-                    // Wait for all threads to complete or for a failure
-                    boolean completed = latch.await(30, TimeUnit.SECONDS);
-                    assertTrue("Test timed out while waiting for concurrent searches", completed);
-                    assertFalse("Test encountered failures during concurrent searches", failureDetected.get());
-                    assertEquals("Incorrect number of queries executed", numThreads * queriesPerThread, totalQueries.get());
-
-                    // Log the number of successful queries
-                    log.info("Successfully completed {} concurrent kNN search queries!", totalQueries.get());
-
-                } finally {
-                    executor.shutdownNow();
+  }
+
+  /**
+   * Tests the functionality and integrity of a Lucene k-NN index under multiple merge cycles and
+   * verifies the proper ordering of vectors and document identifiers.
+   *
+   * <p>The method performs the following validation steps: 1. Indexes a predefined number of
+   * documents into a Lucene index, creating many small segments. Each document includes a k-NN
+   * float vector field encoding a specific order. 2. Executes several merge operations on the index
+   * (partial and full merges) to validate that the merging process maintains correctness and
+   * consistency. 3. Validates the following invariants post-merge: (a) Verifies that the index is
+   * merged into a single segment. (b) Confirms the integrity of vector values by iterating through
+   * the merged segment and checking the relationship between vector components and document
+   * identifiers. (c) Performs k-NN searches with various cases: - Single-threaded searches using
+   * vectors to ensure correct results. - Multi-threaded concurrent searches to confirm robustness
+   * and verify the index operates correctly under concurrent access without exhausting file handles
+   * or encountering other issues.
+   *
+   * <p>Assertions are used throughout to ensure the state of the index matches the expected
+   * behavior, validate merge results, and confirm the accuracy of search operations. The test also
+   * logs the number of successful k-NN queries during the concurrent search phase.
+   *
+   * @throws IOException if an I/O error occurs during index operations.
+   * @throws InterruptedException if the concurrent search phase is interrupted.
+   */
+  @Test
+  public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
+      throws IOException, InterruptedException {
+    final int numDocs = 10000;
+    final String floatVectorField = "vec";
+    final String expectedDocIdField = "expectedDocId";
+    final Path indexPath = createTempDir();
+    final float[][] sourceVectors = TestUtils.generateRandomVectors(numDocs, 2);
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+    try (Directory dir = newFSDirectory(indexPath)) {
+      IndexWriterConfig cfg = newIndexWriterConfig();
+      cfg.setCodec(getCodec());
+      cfg.setUseCompoundFile(false);
+      cfg.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
+      cfg.setMergeScheduler(new SerialMergeScheduler());
+
+      try (IndexWriter w = new IndexWriter(dir, cfg)) {
+        /* ---------- 1. index documents, create many tiny segments ---------- */
+        for (int i = 0; i < numDocs; i++) {
+          Document doc = new Document();
+          // vector whose first component encodes the future (segment-local) docID
+          doc.add(
+              new KnnFloatVectorField(
+                  floatVectorField, sourceVectors[i], vectorSimilarityFunction));
+          doc.add(new StoredField(expectedDocIdField, i));
+          w.addDocument(doc);
+        }
+        w.commit();
+
+        /* ---------- 2. run several merge cycles ---------- */
+        w.forceMerge(5); // partial merge
+        w.forceMerge(3); // another partial merge
+        w.forceMerge(1); // final full merge
+      }
+
+      /* ---------- 3. open reader and assert the invariant ---------- */
+      try (DirectoryReader reader = DirectoryReader.open(dir)) {
+        assertEquals("we merged down to exactly one segment", 1, reader.leaves().size());
+
+        // (a) iterate through vectors directly
+        for (LeafReaderContext context : reader.leaves()) {
+          FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
+          final var docIdSetIterator =
+              vectorValues.iterator(); // iterator for all the vectors with values
+          int docId = -1;
+          while ((docId = docIdSetIterator.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
+            final int luceneDocId = context.docBase + docId;
+            final int globalDocId =
+                reader
+                    .storedFields()
+                    .document(luceneDocId)
+                    .getField(expectedDocIdField)
+                    .storedValue()
+                    .getIntValue();
+            float[] vectorValue = vectorValues.vectorValue(docIdSetIterator.index());
+            float[] expectedVectorValue = sourceVectors[globalDocId];
+            // if the vectors do not match, also look which source vector should be the right result
+            if (!Arrays.equals(expectedVectorValue, vectorValue)) {
+              for (int i = 0; i < sourceVectors.length; i++) {
+                if (Arrays.equals(sourceVectors[i], vectorValue)) {
+                  log.error(
+                      "found vector with global id: {}, in docId: {}, however the actual position of the vector in source is: {}",
+                      globalDocId,
+                      luceneDocId,
+                      i);
                 }
+              }
             }
+            Assert.assertArrayEquals(
+                "vector with global id "
+                    + globalDocId
+                    + " in source doesn't match vector value in lucene docID "
+                    + luceneDocId
+                    + " on the index",
+                expectedVectorValue,
+                vectorValue,
+                0.0f);
+          }
         }
 
-    }
-
-    private void compareSearchResults(
-        TopDocs topDocs,
-        float[][] sourceVectors,
-        DirectoryReader reader,
-        String expectedDocIdField,
-        int baseDocId,
-        FloatVectorValues vectorValues
-    ) throws IOException {
-        // Get the ords matching the lucene doc ids so that we can later find their values in the {@link vectorValues}
-        final Map<Integer, Integer> docToOrdMap = new HashMap<>(); // docToOrd map
-        final var docIdSetIterator = vectorValues.iterator();
-        while (docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-            docToOrdMap.put(docIdSetIterator.docID() + baseDocId, docIdSetIterator.index());
+        // (b) search with the same vector and confirm we are not exhausting the file handles with
+        // each search
+        IndexSearcher searcher = newSearcher(reader);
+        LeafReaderContext context =
+            reader
+                .leaves()
+                .get(0); // we only have one leaf at this point so we can use it to obtain the
+        // vector values
+        final int baseDocId = context.docBase;
+        final FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
+        final int k = 1;
+        for (int i = 0; i < reader.maxDoc(); i++) {
+          float[] query = TestUtils.generateRandomVectors(1, 2)[0];
+          TopDocs td =
+              searcher.search(
+                  getJVectorKnnFloatVectorQuery("vec", query, k, new MatchAllDocsQuery()), k);
+          assertEquals(k, td.scoreDocs.length);
+
+          compareSearchResults(
+              td, sourceVectors, reader, expectedDocIdField, baseDocId, vectorValues);
         }
 
-        for (int resultIdx = 0; resultIdx < topDocs.scoreDocs.length; resultIdx++) {
-            final int localDocId = topDocs.scoreDocs[resultIdx].doc;
-            final int globalDocId = reader.storedFields().document(localDocId).getField(expectedDocIdField).storedValue().getIntValue();
-
-            // Access to float values is not thread safe
-            final float[] vectorValue;
-            synchronized (vectorValues) {
-                vectorValue = vectorValues.vectorValue(docToOrdMap.get(localDocId));
-            }
-            float[] expectedVectorValue = sourceVectors[globalDocId];
-            Assert.assertArrayEquals("vectors in source and index should match", expectedVectorValue, vectorValue, 0.0f);
-        }
-    }
-
-    /**
-     * Test to verify that a document which has been deleted is no longer
-     * returned in a k-NN search.  The index uses the JVector codec and is
-     * kept in multiple segments to ensure we also cover the case where the
-     * deleted document still physically resides in the segment as a dead
-     * (non-live) record.
-     */
-    @Test
-    public void deletedDocs() throws IOException {
-        final int totalNumberOfDocs = 100;
-        final int batchSize = 10;
-        final int k = batchSize - 1;
-        final int docToDeleteInEachBatch = 5;
-        final Path indexPath = createTempDir();
-        final IndexWriterConfig iwc = newIndexWriterConfig();
-        // JVector codec requires compound files to be disabled at the moment
-        iwc.setUseCompoundFile(false);
-        iwc.setCodec(getCodec());
-        iwc.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
-
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter writer = new IndexWriter(dir, iwc)) {
-
-            /*
-             * 1.  Index 100 docs, in batches of 10.  Delete the 5th doc in each batch.
-             *     will leave us with 10 segments, each with 9 live docs.
-             */
-            int batchNumber = 0;
-            for (int i = 1; i <= totalNumberOfDocs; i++) {
-                Document doc = new Document();
-                final float[] vector = { 0.0f, 1.0f * (i + batchNumber) };
-                doc.add(new StringField("docId", Integer.toString(i + 1), Field.Store.YES));
-                doc.add(new KnnFloatVectorField("test_field", vector, VectorSimilarityFunction.EUCLIDEAN));
-                writer.addDocument(doc);
-                if (i % batchSize == 0) {
-                    writer.flush();
-                    writer.deleteDocuments(new TermQuery(new Term("docId", Integer.toString(i - docToDeleteInEachBatch))));
-                    batchNumber++;
-                }
-            }
-            writer.commit();
-
-            /* ----------------------------------------
-             * 2.  Merge all segments into one
-             * ---------------------------------------- */
-            writer.forceMerge(1);
-
-            /* ----------------------------------------
-             * 3.  Search – the deleted doc must be gone
-             * ---------------------------------------- */
-            try (IndexReader reader = DirectoryReader.open(writer)) {
-                assertEquals(
-                    "All documents except the deleted ones should be live",
-                    totalNumberOfDocs - (totalNumberOfDocs / batchSize),
-                    reader.numDocs()
-                );
-                // For each batch we will verify that the deleted document doesn't come up in search and only it's neighbours are returned
-
-                for (int i = 0; i < totalNumberOfDocs; i += batchSize) {
-                    final float[] target = { 0.0f, 1.0f * (i + docToDeleteInEachBatch) };
-                    final IndexSearcher searcher = newSearcher(reader);
-                    final KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery(
-                        "test_field",
-                        target,
-                        k,
-                        new MatchAllDocsQuery()
-                    );
-                    TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                    assertEquals(k, topDocs.totalHits.value());
-                    for (int j = 0; j < k; j++) {
-                        Document doc = reader.storedFields().document(topDocs.scoreDocs[j].doc);
-                        int docId = Integer.parseInt(doc.get("docId"));
-                        assertNotEquals("Deleted doc should not be returned in search results", i + docToDeleteInEachBatch, docId);
+        // (c) search with the same vector and this time add concurrency to make sure we are still
+        // not exhausting the file handles
+        int numThreads = 10; // Number of concurrent search threads
+        int queriesPerThread = 100; // Number of searches per thread
+        ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+        CountDownLatch latch = new CountDownLatch(numThreads);
+        AtomicBoolean failureDetected = new AtomicBoolean(false);
+        AtomicInteger totalQueries = new AtomicInteger(0);
+
+        try {
+          for (int t = 0; t < numThreads; t++) {
+            executor.submit(
+                () -> {
+                  int i = 0;
+
+                  try {
+                    for (i = 0; i < queriesPerThread && !failureDetected.get(); i++) {
+                      float[] query = TestUtils.generateRandomVectors(1, 2)[0];
+                      try {
+                        TopDocs td = searcher.search(new KnnFloatVectorQuery("vec", query, k), k);
+                        assertEquals(
+                            "Search should return correct number of results",
+                            k,
+                            td.scoreDocs.length);
+                        compareSearchResults(
+                            td, sourceVectors, reader, expectedDocIdField, baseDocId, vectorValues);
+                        totalQueries.incrementAndGet();
+                      } catch (Throwable e) {
+                        failureDetected.compareAndSet(false, true);
+                        log.error("Exception encountered", e);
+                        fail("Exception during concurrent search: " + e.getMessage());
+                      }
                     }
-                }
-            }
+                  } finally {
+                    latch.countDown();
+                    log.warn("Ran {} queries", i);
+                  }
+                });
+          }
+
+          // Wait for all threads to complete or for a failure
+          boolean completed = latch.await(30, TimeUnit.SECONDS);
+          assertTrue("Test timed out while waiting for concurrent searches", completed);
+          assertFalse(
+              "Test encountered failures during concurrent searches", failureDetected.get());
+          assertEquals(
+              "Incorrect number of queries executed",
+              numThreads * queriesPerThread,
+              totalQueries.get());
+
+          // Log the number of successful queries
+          log.info("Successfully completed {} concurrent kNN search queries!", totalQueries.get());
+
+        } finally {
+          executor.shutdownNow();
         }
+      }
     }
-
-    /**
-     * Test to verify that the Lucene codec is able to successfully search for the nearest neighbours
-     * in the index.
-     * Single field is used to store the vectors.
-     * Documents are stored in potentially multiple segments.
-     * Multiple commits.
-     * Multiple merges.
-     * Merge is enabled.
-     * compound file is enabled.
-     */
-    @Test
-    public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOException {
-        int k = 3; // The number of nearest neighbors to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(true);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
-        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f / i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                w.addDocument(doc);
-                w.flush(); // this creates a new segment without triggering a merge
-            }
-            log.info("Done writing all files to the file system");
-
-            w.forceMerge(1); // this merges all segments into a single segment
-            log.info("Done merging all segments");
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have 1 segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(9, topDocs.scoreDocs[0].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.01f
-                );
-                assertEquals(8, topDocs.scoreDocs[1].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 9.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.01f
-                );
-                assertEquals(7, topDocs.scoreDocs[2].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.01f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  private void compareSearchResults(
+      TopDocs topDocs,
+      float[][] sourceVectors,
+      DirectoryReader reader,
+      String expectedDocIdField,
+      int baseDocId,
+      FloatVectorValues vectorValues)
+      throws IOException {
+    // Get the ords matching the lucene doc ids so that we can later find their values in the {@link
+    // vectorValues}
+    final Map<Integer, Integer> docToOrdMap = new HashMap<>(); // docToOrd map
+    final var docIdSetIterator = vectorValues.iterator();
+    while (docIdSetIterator.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
+      docToOrdMap.put(docIdSetIterator.docID() + baseDocId, docIdSetIterator.index());
     }
 
-    /**
-     * Test to verify that the Lucene codec is able to successfully search for the nearest neighbours
-     * in the index.
-     * Single field is used to store the vectors.
-     * Documents are stored in potentially multiple segments.
-     * Multiple commits.
-     * Multiple merges.
-     * Merge is enabled.
-     * compound file is enabled.
-     * cosine similarity is used.
-     */
-    @Test
-    public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOException {
-        int k = 3; // The number of nearest neighbours to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(true);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
-        indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 1.0f, 1.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 1.0f + i, 2.0f * i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.COSINE));
-                w.addDocument(doc);
-                w.flush(); // this creates a new segment without triggering a merge
-            }
-            log.info("Done writing all files to the file system");
-
-            w.forceMerge(1); // this merges all segments into a single segment
-            log.info("Done merging all segments");
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have 1 segment with 10 documents");
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(0, topDocs.scoreDocs[0].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.COSINE.compare(target, new float[] { 2.0f, 2.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                assertEquals(1, topDocs.scoreDocs[1].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.COSINE.compare(target, new float[] { 3.0f, 4.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                assertEquals(2, topDocs.scoreDocs[2].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.COSINE.compare(target, new float[] { 4.0f, 6.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed search tests");
-            }
-        }
+    for (int resultIdx = 0; resultIdx < topDocs.scoreDocs.length; resultIdx++) {
+      final int localDocId = topDocs.scoreDocs[resultIdx].doc;
+      final int globalDocId =
+          reader
+              .storedFields()
+              .document(localDocId)
+              .getField(expectedDocIdField)
+              .storedValue()
+              .getIntValue();
+
+      // Access to float values is not thread safe
+      final float[] vectorValue;
+      synchronized (vectorValues) {
+        vectorValue = vectorValues.vectorValue(docToOrdMap.get(localDocId));
+      }
+      float[] expectedVectorValue = sourceVectors[globalDocId];
+      Assert.assertArrayEquals(
+          "vectors in source and index should match", expectedVectorValue, vectorValue, 0.0f);
     }
-
-    /**
-     * Test to verify that the JVector codec is providing proper error if used with byte vector
-     * TODO: Create Binary Quantization support for JVector codec
-     */
-    @Test
-    public void testJVectorKnnIndex_simpleCase_withBinaryVector() throws IOException {
-        int k = 3; // The number of nearest neighbours to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        // TODO: re-enable this after fixing the compound file augmentation for JVector
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (Directory dir = newFSDirectory(indexPath); RandomIndexWriter w = new RandomIndexWriter(random(), dir, indexWriterConfig)) {
-            final byte[] source = new byte[] { (byte) 0, (byte) 0 };
-            final Document doc = new Document();
-            doc.add(new KnnByteVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-            Assert.assertThrows(UnsupportedOperationException.class, () -> w.addDocument(doc));
+  }
+
+  /**
+   * Test to verify that a document which has been deleted is no longer returned in a k-NN search.
+   * The index uses the JVector codec and is kept in multiple segments to ensure we also cover the
+   * case where the deleted document still physically resides in the segment as a dead (non-live)
+   * record.
+   */
+  @Test
+  public void deletedDocs() throws IOException {
+    final int totalNumberOfDocs = 100;
+    final int batchSize = 10;
+    final int k = batchSize - 1;
+    final int docToDeleteInEachBatch = 5;
+    final Path indexPath = createTempDir();
+    final IndexWriterConfig iwc = newIndexWriterConfig();
+    // JVector codec requires compound files to be disabled at the moment
+    iwc.setUseCompoundFile(false);
+    iwc.setCodec(getCodec());
+    iwc.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
+
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter writer = new IndexWriter(dir, iwc)) {
+
+      /*
+       * 1.  Index 100 docs, in batches of 10.  Delete the 5th doc in each batch.
+       *     will leave us with 10 segments, each with 9 live docs.
+       */
+      int batchNumber = 0;
+      for (int i = 1; i <= totalNumberOfDocs; i++) {
+        Document doc = new Document();
+        final float[] vector = {0.0f, 1.0f * (i + batchNumber)};
+        doc.add(new StringField("docId", Integer.toString(i + 1), Field.Store.YES));
+        doc.add(new KnnFloatVectorField("test_field", vector, VectorSimilarityFunction.EUCLIDEAN));
+        writer.addDocument(doc);
+        if (i % batchSize == 0) {
+          writer.flush();
+          writer.deleteDocuments(
+              new TermQuery(new Term("docId", Integer.toString(i - docToDeleteInEachBatch))));
+          batchNumber++;
         }
-    }
-
-    /**
-     * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
-     * in the index with a filter applied.
-     */
-    @Test
-    public void testJVectorKnnIndex_withFilter() throws IOException {
-        int k = 3; // The number of nearest neighbours to gather
-        int totalNumberOfDocs = 10;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec());
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (Directory dir = newFSDirectory(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = new float[] { 0.0f, 0.0f };
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = new float[] { 0.0f, 1.0f / i };
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                doc.add(new StringField("filter_field", i % 2 == 0 ? "even" : "odd", Field.Store.YES));
-                w.addDocument(doc);
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.commit();
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("Applying filter to the KNN search");
-                final Query filterQuery = new TermQuery(new Term("filter_field", "even"));
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-
-                log.info("Validating filtered KNN results");
-                assertEquals(k, topDocs.totalHits.value());
-                assertEquals(9, topDocs.scoreDocs[0].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 10.0f }),
-                    topDocs.scoreDocs[0].score,
-                    0.001f
-                );
-                assertEquals(7, topDocs.scoreDocs[1].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 8.0f }),
-                    topDocs.scoreDocs[1].score,
-                    0.001f
-                );
-                assertEquals(5, topDocs.scoreDocs[2].doc);
-                Assert.assertEquals(
-                    VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] { 0.0f, 1.0f / 6.0f }),
-                    topDocs.scoreDocs[2].score,
-                    0.001f
-                );
-                log.info("successfully completed filtered search tests");
-            }
+      }
+      writer.commit();
+
+      /* ----------------------------------------
+       * 2.  Merge all segments into one
+       * ---------------------------------------- */
+      writer.forceMerge(1);
+
+      /* ----------------------------------------
+       * 3.  Search – the deleted doc must be gone
+       * ---------------------------------------- */
+      try (IndexReader reader = DirectoryReader.open(writer)) {
+        assertEquals(
+            "All documents except the deleted ones should be live",
+            totalNumberOfDocs - (totalNumberOfDocs / batchSize),
+            reader.numDocs());
+        // For each batch we will verify that the deleted document doesn't come up in search and
+        // only it's neighbours are returned
+
+        for (int i = 0; i < totalNumberOfDocs; i += batchSize) {
+          final float[] target = {0.0f, 1.0f * (i + docToDeleteInEachBatch)};
+          final IndexSearcher searcher = newSearcher(reader);
+          final KnnFloatVectorQuery knnFloatVectorQuery =
+              getJVectorKnnFloatVectorQuery("test_field", target, k, new MatchAllDocsQuery());
+          TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+          assertEquals(k, topDocs.totalHits.value());
+          for (int j = 0; j < k; j++) {
+            Document doc = reader.storedFields().document(topDocs.scoreDocs[j].doc);
+            int docId = Integer.parseInt(doc.get("docId"));
+            assertNotEquals(
+                "Deleted doc should not be returned in search results",
+                i + docToDeleteInEachBatch,
+                docId);
+          }
         }
+      }
     }
-
-    /**
-     * Test the simple case of quantization where we have the perfect batch single batch size with no merges or too small batch sizes
-     */
-    @Test
-    public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException {
-        int k = 50; // The number of nearest neighbours to gather
-        int dimension = 16;
-        int totalNumberOfDocs = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-                                                     // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
-            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
-            for (int i = 0; i < vectors.length; i++) {
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField(TEST_FIELD, vectors[i], vectorSimilarityFunction));
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                w.addDocument(doc);
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.commit();
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
-                Assert.assertEquals(1.0f, recall, 0.05f);
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  /**
+   * Test to verify that the Lucene codec is able to successfully search for the nearest neighbours
+   * in the index. Single field is used to store the vectors. Documents are stored in potentially
+   * multiple segments. Multiple commits. Multiple merges. Merge is enabled. compound file is
+   * enabled.
+   */
+  @Test
+  public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOException {
+    int k = 3; // The number of nearest neighbors to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(true);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+    indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f / i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        w.addDocument(doc);
+        w.flush(); // this creates a new segment without triggering a merge
+      }
+      log.info("Done writing all files to the file system");
+
+      w.forceMerge(1); // this merges all segments into a single segment
+      log.info("Done merging all segments");
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have 1 segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(9, topDocs.scoreDocs[0].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 10.0f}),
+            topDocs.scoreDocs[0].score,
+            0.01f);
+        assertEquals(8, topDocs.scoreDocs[1].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 9.0f}),
+            topDocs.scoreDocs[1].score,
+            0.01f);
+        assertEquals(7, topDocs.scoreDocs[2].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
+            topDocs.scoreDocs[2].score,
+            0.01f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test recall with different types of rerank parameters
-     */
-    @Test
-    public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOException {
-        int k = 1; // The number of nearest neighbours to gather
-        int dimension = 16;
-        int totalNumberOfDocs = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-            for (int i = 1; i < totalNumberOfDocs + 1; i++) {
-                final float[] source = generateZerosVectorWithLastValue(dimension, i);
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
-                w.addDocument(doc);
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.commit();
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                float expectedMinScoreInTopK = VectorSimilarityFunction.EUCLIDEAN.compare(
-                    target,
-                    new float[] { 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, k }
-                );
-
-                // Query with essentially no reranking and expect recall to be very low
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-
-                final float recallWithLowOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
-
-                // Query with reranking and expect recall to be high
-                knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 5);
-                topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                float recallWithHighOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
-                Assert.assertTrue(recallWithLowOverqueryFactor <= recallWithHighOverqueryFactor);
-
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  /**
+   * Test to verify that the Lucene codec is able to successfully search for the nearest neighbours
+   * in the index. Single field is used to store the vectors. Documents are stored in potentially
+   * multiple segments. Multiple commits. Multiple merges. Merge is enabled. compound file is
+   * enabled. cosine similarity is used.
+   */
+  @Test
+  public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOException {
+    int k = 3; // The number of nearest neighbours to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(true);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
+    indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {1.0f, 1.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {1.0f + i, 2.0f * i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.COSINE));
+        w.addDocument(doc);
+        w.flush(); // this creates a new segment without triggering a merge
+      }
+      log.info("Done writing all files to the file system");
+
+      w.forceMerge(1); // this merges all segments into a single segment
+      log.info("Done merging all segments");
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have 1 segment with 10 documents");
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(0, topDocs.scoreDocs[0].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.COSINE.compare(target, new float[] {2.0f, 2.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        assertEquals(1, topDocs.scoreDocs[1].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.COSINE.compare(target, new float[] {3.0f, 4.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        assertEquals(2, topDocs.scoreDocs[2].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.COSINE.compare(target, new float[] {4.0f, 6.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Test the simple case of quantization where we have the perfect batch single batch size each time with a merge of
-     * multiple segments
-     */
-    @Test
-    public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() throws IOException {
-        final int dimension = 16;
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
-                          // jittery tests
-        final int perfectBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION; // MINIMUM_BATCH_SIZE_FOR_QUANTIZATION is the minimal
-                                                                                  // batch size that will trigger a quantization without
-                                                                                  // breaking it, generally speaking the batch size can't be
-                                                                                  // lower than the number of clusters
-        final int totalNumberOfDocs = perfectBatchSize * 2;
-
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
-            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
-
-            for (int i = 0; i < vectors.length; i++) {
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField(TEST_FIELD, vectors[i], vectorSimilarityFunction));
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                w.addDocument(doc);
-                if (i % perfectBatchSize == 0) {
-                    w.commit();
-                }
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.forceMerge(1);
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
-                Assert.assertEquals(1.0f, recall, 0.05f);
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  /**
+   * Test to verify that the JVector codec is providing proper error if used with byte vector TODO:
+   * Create Binary Quantization support for JVector codec
+   */
+  @Test
+  public void testJVectorKnnIndex_simpleCase_withBinaryVector() throws IOException {
+    int k = 3; // The number of nearest neighbours to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    // TODO: re-enable this after fixing the compound file augmentation for JVector
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (Directory dir = newFSDirectory(indexPath);
+        RandomIndexWriter w = new RandomIndexWriter(random(), dir, indexWriterConfig)) {
+      final byte[] source = new byte[] {(byte) 0, (byte) 0};
+      final Document doc = new Document();
+      doc.add(new KnnByteVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+      Assert.assertThrows(UnsupportedOperationException.class, () -> w.addDocument(doc));
     }
-
-    /**
-     * Test the non-ideal case where batch sizes are not perfect and are lower than the number of recommended clusters in the index
-     * The expected behavior is for the quantization to only kick in when we have a merge or batch size that is bigger than the minimal required batch size
-     */
-    @Test
-    public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges() throws IOException {
-        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
-                          // jittery tests
-        final int dimension = 16;
-        final int notIdealBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION / 3; // Batch size that is not ideal for quantization and
-                                                                                       // shouldn't trigger it
-        final int totalNumberOfDocs = notIdealBatchSize * 3; // 3 batches of documents each will result in quantization only when the merge
-                                                             // is triggered, and we have a batch size of {@link
-                                                             // MINIMUM_BATCH_SIZE_FOR_QUANTIZATION} as a result of merging all the smaller
-                                                             // batches
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(false);
-        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
-        // We set the below parameters to make sure no permature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
-            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
-            for (int i = 0; i < totalNumberOfDocs; i++) {
-                final float[] source = vectors[i];
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                w.addDocument(doc);
-                if (i % notIdealBatchSize == 0) {
-                    w.commit();
-                }
-            }
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.forceMerge(1);
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
-                Assert.assertEquals(1.0f, recall, 0.05f);
-                log.info("successfully completed search tests");
-            }
-        }
+  }
+
+  /**
+   * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
+   * in the index with a filter applied.
+   */
+  @Test
+  public void testJVectorKnnIndex_withFilter() throws IOException {
+    int k = 3; // The number of nearest neighbours to gather
+    int totalNumberOfDocs = 10;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec());
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (Directory dir = newFSDirectory(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = new float[] {0.0f, 0.0f};
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = new float[] {0.0f, 1.0f / i};
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        doc.add(new StringField("filter_field", i % 2 == 0 ? "even" : "odd", Field.Store.YES));
+        w.addDocument(doc);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.commit();
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("Applying filter to the KNN search");
+        final Query filterQuery = new TermQuery(new Term("filter_field", "even"));
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+
+        log.info("Validating filtered KNN results");
+        assertEquals(k, topDocs.totalHits.value());
+        assertEquals(9, topDocs.scoreDocs[0].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 10.0f}),
+            topDocs.scoreDocs[0].score,
+            0.001f);
+        assertEquals(7, topDocs.scoreDocs[1].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
+            topDocs.scoreDocs[1].score,
+            0.001f);
+        assertEquals(5, topDocs.scoreDocs[2].doc);
+        Assert.assertEquals(
+            VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 6.0f}),
+            topDocs.scoreDocs[2].score,
+            0.001f);
+        log.info("successfully completed filtered search tests");
+      }
     }
-
-    /**
-     * Test the non-ideal case where batch sizes are not perfect and are lower than the number of recommended clusters in the index
-     * The expected behavior is for the quantization to only kick in when we have a merge or batch size that is bigger than the minimal required batch size
-     * Also this is adding the compound file to the mix
-     */
-    @Test
-    public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_withCompoundFile() throws IOException {
-        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
-        // jittery tests
-        final int dimension = 16;
-        final int notIdealBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION / 3; // Batch size that is not ideal for quantization and
-        // shouldn't trigger it
-        final int totalNumberOfDocs = notIdealBatchSize * 10; // 3 batches of documents each will result in quantization only when the merge
-        // is triggered, and we have a batch size of {@link MINIMUM_BATCH_SIZE_FOR_QUANTIZATION}
-        // as a result of merging all the smaller batches
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-
-        boolean useCompoundFile = true;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(useCompoundFile);
-        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(useCompoundFile));
-        // We set the below parameters to make sure no premature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-            // We will use random vectors because otherwise PQ will have a correlated subspaces which will result in a broken linear graph
-            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
-            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
-            for (int i = 0; i < totalNumberOfDocs; i++) {
-                final float[] source = vectors[i];
-                final Document doc = new Document();
-                doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                w.addDocument(doc);
-                if (i % notIdealBatchSize == 0) {
-                    w.commit();
-                }
-            }
-            w.commit();
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.forceMerge(1);
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
-                Assert.assertEquals("Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
-                log.info("successfully completed search tests");
-            }
-        }
-
-        Assert.assertTrue("No quantization time recorded", KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount() > 0);
-        Assert.assertTrue("No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+  }
+
+  /**
+   * Test the simple case of quantization where we have the perfect batch single batch size with no
+   * merges or too small batch sizes
+   */
+  @Test
+  public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException {
+    int k = 50; // The number of nearest neighbours to gather
+    int dimension = 16;
+    int totalNumberOfDocs = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    // We set the below parameters to make sure no permature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final Set<Integer> groundTruthVectorsIds =
+          calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+      for (int i = 0; i < vectors.length; i++) {
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField(TEST_FIELD, vectors[i], vectorSimilarityFunction));
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        w.addDocument(doc);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.commit();
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+        Assert.assertEquals(1.0f, recall, 0.05f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * We will use multiple batches, each can trigger a quantization and later merge them in an appending order to keep track
-     * of refinement
-     * @throws IOException
-     */
-    @Test
-    public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinement() throws IOException {
-        final int k = 50; // The number of nearest neighbours to gather, we set a high number here to avoid an inaccurate result and
-        // jittery tests
-        final int dimension = 16;
-        final int idealBatchSize = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION; // Batch size that is not ideal for quantization and
-        // shouldn't trigger it
-        final int totalNumberOfDocs = idealBatchSize * 10; // 10 batches, each batch on it's own will trigger quantization
-        final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-
-        boolean useCompoundFile = true;
-        IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
-        indexWriterConfig.setUseCompoundFile(useCompoundFile);
-        indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
-        indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(useCompoundFile));
-        // We set the below parameters to make sure no premature flush will occur, this way we can have a single segment, and we can force
-        // test the quantization case
-        indexWriterConfig.setMaxBufferedDocs(10000); // force flush every 10000 docs, this way we make sure that we only have a single
-        // segment for a totalNumberOfDocs < 1000
-        indexWriterConfig.setRAMPerThreadHardLimitMB(1000); // 1000MB per thread, this way we make sure that no premature flush will occur
-        final Path indexPath = createTempDir();
-        log.info("Index path: {}", indexPath);
-        try (FSDirectory dir = FSDirectory.open(indexPath); IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
-            final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-            // We will use random vectors because otherwise PQ will have a correlated subspaces which will result in a broken linear graph
-            final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
-            final Set<Integer> groundTruthVectorsIds = calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
-            for (int i = 0; i < totalNumberOfDocs; i++) {
-                final float[] source = vectors[i];
-                final Document doc = new Document();
-                doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
-                doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
-                w.addDocument(doc);
-                if (i % idealBatchSize == 0) {
-                    final long beforeTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
-                    w.commit();
-                    w.forceMerge(1); // force merge will trigger PQ refinement if other segments are present
-                    final long afterTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
-                    Assert.assertTrue(
-                        "Expected to have a training time of at least " + beforeTrainingTime + " but got " + afterTrainingTime,
-                        afterTrainingTime >= beforeTrainingTime
-                    );
-                }
-            }
-            w.commit();
-            log.info("Flushing docs to make them discoverable on the file system");
-            w.forceMerge(1);
-
-            try (IndexReader reader = DirectoryReader.open(w)) {
-                log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
-                Assert.assertEquals(1, reader.getContext().leaves().size());
-                Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
-
-                final Query filterQuery = new MatchAllDocsQuery();
-                final IndexSearcher searcher = newSearcher(reader);
-                KnnFloatVectorQuery knnFloatVectorQuery = getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
-                TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
-                assertEquals(k, topDocs.totalHits.value());
-                final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
-                Assert.assertEquals("Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
-                log.info("successfully completed search tests");
-            }
+  }
+
+  /** Test recall with different types of rerank parameters */
+  @Test
+  public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOException {
+    int k = 1; // The number of nearest neighbours to gather
+    int dimension = 16;
+    int totalNumberOfDocs = DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    // We set the below parameters to make sure no permature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+      for (int i = 1; i < totalNumberOfDocs + 1; i++) {
+        final float[] source = generateZerosVectorWithLastValue(dimension, i);
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
+        w.addDocument(doc);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.commit();
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        float expectedMinScoreInTopK =
+            VectorSimilarityFunction.EUCLIDEAN.compare(
+                target,
+                new float[] {
+                  0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f,
+                  0.0f, 0.0f, k
+                });
+
+        // Query with essentially no reranking and expect recall to be very low
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+
+        final float recallWithLowOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
+
+        // Query with reranking and expect recall to be high
+        knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 5);
+        topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        float recallWithHighOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
+        Assert.assertTrue(recallWithLowOverqueryFactor <= recallWithHighOverqueryFactor);
+
+        log.info("successfully completed search tests");
+      }
+    }
+  }
+
+  /**
+   * Test the simple case of quantization where we have the perfect batch single batch size each
+   * time with a merge of multiple segments
+   */
+  @Test
+  public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() throws IOException {
+    final int dimension = 16;
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+    final int k =
+        50; // The number of nearest neighbours to gather, we set a high number here to avoid an
+    // inaccurate result and
+    // jittery tests
+    final int perfectBatchSize =
+        DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION; // MINIMUM_BATCH_SIZE_FOR_QUANTIZATION is the
+    // minimal
+    // batch size that will trigger a quantization without
+    // breaking it, generally speaking the batch size can't be
+    // lower than the number of clusters
+    final int totalNumberOfDocs = perfectBatchSize * 2;
+
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    // We set the below parameters to make sure no permature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final Set<Integer> groundTruthVectorsIds =
+          calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+
+      for (int i = 0; i < vectors.length; i++) {
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField(TEST_FIELD, vectors[i], vectorSimilarityFunction));
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        w.addDocument(doc);
+        if (i % perfectBatchSize == 0) {
+          w.commit();
         }
-
-        Assert.assertTrue("No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.forceMerge(1);
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+        Assert.assertEquals(1.0f, recall, 0.05f);
+        log.info("successfully completed search tests");
+      }
     }
-
-    /**
-     * Calculate the recall for the top k documents
-     * For simplicity we assume that all documents have unique scores and therefore the minimum score in the top k documents is the kth document
-     * @param topDocs the top documents returned by the search
-     * @param minScoreInTopK the minimum score in the top k documents
-     * @return the recall of the top k documents
-     */
-    private float calculateRecall(TopDocs topDocs, float minScoreInTopK) {
-        int totalRelevantDocs = 0;
-        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
-            if (topDocs.scoreDocs[i].score >= minScoreInTopK) {
-                totalRelevantDocs++;
-            }
+  }
+
+  /**
+   * Test the non-ideal case where batch sizes are not perfect and are lower than the number of
+   * recommended clusters in the index The expected behavior is for the quantization to only kick in
+   * when we have a merge or batch size that is bigger than the minimal required batch size
+   */
+  @Test
+  public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges()
+      throws IOException {
+    final int k =
+        50; // The number of nearest neighbours to gather, we set a high number here to avoid an
+    // inaccurate result and
+    // jittery tests
+    final int dimension = 16;
+    final int notIdealBatchSize =
+        DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION
+            / 3; // Batch size that is not ideal for quantization and
+    // shouldn't trigger it
+    final int totalNumberOfDocs =
+        notIdealBatchSize
+            * 3; // 3 batches of documents each will result in quantization only when the merge
+    // is triggered, and we have a batch size of {@link
+    // MINIMUM_BATCH_SIZE_FOR_QUANTIZATION} as a result of merging all the smaller
+    // batches
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(false);
+    indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
+    // We set the below parameters to make sure no permature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final Set<Integer> groundTruthVectorsIds =
+          calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+      for (int i = 0; i < totalNumberOfDocs; i++) {
+        final float[] source = vectors[i];
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField("test_field", source, vectorSimilarityFunction));
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        w.addDocument(doc);
+        if (i % notIdealBatchSize == 0) {
+          w.commit();
         }
-        float recall = ((float) totalRelevantDocs) / ((float) topDocs.scoreDocs.length);
-
-        if (recall == 0.0f) {
-            log.info(
-                "Recall is 0.0, this is probably not correct, here is some debug information\n topDocs: {}, minScoreInTopK: {}, totalRelevantDocs: {}",
-                topDocsToString(topDocs),
-                minScoreInTopK,
-                totalRelevantDocs
-            );
+      }
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.forceMerge(1);
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+        Assert.assertEquals(1.0f, recall, 0.05f);
+        log.info("successfully completed search tests");
+      }
+    }
+  }
+
+  /**
+   * Test the non-ideal case where batch sizes are not perfect and are lower than the number of
+   * recommended clusters in the index The expected behavior is for the quantization to only kick in
+   * when we have a merge or batch size that is bigger than the minimal required batch size Also
+   * this is adding the compound file to the mix
+   */
+  @Test
+  public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_withCompoundFile()
+      throws IOException {
+    final int k =
+        50; // The number of nearest neighbours to gather, we set a high number here to avoid an
+    // inaccurate result and
+    // jittery tests
+    final int dimension = 16;
+    final int notIdealBatchSize =
+        DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION
+            / 3; // Batch size that is not ideal for quantization and
+    // shouldn't trigger it
+    final int totalNumberOfDocs =
+        notIdealBatchSize
+            * 10; // 3 batches of documents each will result in quantization only when the merge
+    // is triggered, and we have a batch size of {@link MINIMUM_BATCH_SIZE_FOR_QUANTIZATION}
+    // as a result of merging all the smaller batches
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+    boolean useCompoundFile = true;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(useCompoundFile);
+    indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(useCompoundFile));
+    // We set the below parameters to make sure no premature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+      // We will use random vectors because otherwise PQ will have a correlated subspaces which will
+      // result in a broken linear graph
+      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final Set<Integer> groundTruthVectorsIds =
+          calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+      for (int i = 0; i < totalNumberOfDocs; i++) {
+        final float[] source = vectors[i];
+        final Document doc = new Document();
+        doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        w.addDocument(doc);
+        if (i % notIdealBatchSize == 0) {
+          w.commit();
         }
-        return recall;
+      }
+      w.commit();
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.forceMerge(1);
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+        Assert.assertEquals(
+            "Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
+        log.info("successfully completed search tests");
+      }
     }
 
-    // convert topDocs to a pretty printed string
-    private String topDocsToString(TopDocs topDocs) {
-        StringBuilder sb = new StringBuilder();
-        sb.append("TopDocs: [");
-        for (int i = 0; i < topDocs.scoreDocs.length; i++) {
-            sb.append(topDocs.scoreDocs[i].doc).append(" (").append(topDocs.scoreDocs[i].score).append("), ");
+    Assert.assertTrue(
+        "No quantization time recorded", KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount() > 0);
+    Assert.assertTrue(
+        "No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+  }
+
+  /**
+   * We will use multiple batches, each can trigger a quantization and later merge them in an
+   * appending order to keep track of refinement
+   */
+  @Test
+  public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinement()
+      throws IOException {
+    final int k =
+        50; // The number of nearest neighbours to gather, we set a high number here to avoid an
+    // inaccurate result and
+    // jittery tests
+    final int dimension = 16;
+    final int idealBatchSize =
+        DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION; // Batch size that is not ideal for
+    // quantization and
+    // shouldn't trigger it
+    final int totalNumberOfDocs =
+        idealBatchSize * 10; // 10 batches, each batch on it's own will trigger quantization
+    final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
+
+    boolean useCompoundFile = true;
+    IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
+    indexWriterConfig.setUseCompoundFile(useCompoundFile);
+    indexWriterConfig.setCodec(getCodec(DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION));
+    indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(useCompoundFile));
+    // We set the below parameters to make sure no premature flush will occur, this way we can have
+    // a single segment, and we can force
+    // test the quantization case
+    indexWriterConfig.setMaxBufferedDocs(
+        10000); // force flush every 10000 docs, this way we make sure that we only have a single
+    // segment for a totalNumberOfDocs < 1000
+    indexWriterConfig.setRAMPerThreadHardLimitMB(
+        1000); // 1000MB per thread, this way we make sure that no premature flush will occur
+    final Path indexPath = createTempDir();
+    log.info("Index path: {}", indexPath);
+    try (FSDirectory dir = FSDirectory.open(indexPath);
+        IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
+      final float[] target = generateZerosVectorWithLastValue(dimension, 0);
+      // We will use random vectors because otherwise PQ will have a correlated subspaces which will
+      // result in a broken linear graph
+      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final Set<Integer> groundTruthVectorsIds =
+          calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
+      for (int i = 0; i < totalNumberOfDocs; i++) {
+        final float[] source = vectors[i];
+        final Document doc = new Document();
+        doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
+        doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
+        w.addDocument(doc);
+        if (i % idealBatchSize == 0) {
+          final long beforeTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
+          w.commit();
+          w.forceMerge(1); // force merge will trigger PQ refinement if other segments are present
+          final long afterTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
+          Assert.assertTrue(
+              "Expected to have a training time of at least "
+                  + beforeTrainingTime
+                  + " but got "
+                  + afterTrainingTime,
+              afterTrainingTime >= beforeTrainingTime);
         }
-        sb.append("]");
-        return sb.toString();
+      }
+      w.commit();
+      log.info("Flushing docs to make them discoverable on the file system");
+      w.forceMerge(1);
+
+      try (IndexReader reader = DirectoryReader.open(w)) {
+        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        Assert.assertEquals(1, reader.getContext().leaves().size());
+        Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
+
+        final Query filterQuery = new MatchAllDocsQuery();
+        final IndexSearcher searcher = newSearcher(reader);
+        KnnFloatVectorQuery knnFloatVectorQuery =
+            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
+        TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
+        assertEquals(k, topDocs.totalHits.value());
+        final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
+        Assert.assertEquals(
+            "Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
+        log.info("successfully completed search tests");
+      }
     }
 
-    private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(String fieldName, float[] target, int k, Query filterQuery) {
-        return getJVectorKnnFloatVectorQuery(fieldName, target, k, filterQuery, KNNConstants.DEFAULT_OVER_QUERY_FACTOR);
+    Assert.assertTrue(
+        "No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+  }
+
+  /**
+   * Calculate the recall for the top k documents For simplicity we assume that all documents have
+   * unique scores and therefore the minimum score in the top k documents is the kth document
+   *
+   * @param topDocs the top documents returned by the search
+   * @param minScoreInTopK the minimum score in the top k documents
+   * @return the recall of the top k documents
+   */
+  private float calculateRecall(TopDocs topDocs, float minScoreInTopK) {
+    int totalRelevantDocs = 0;
+    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+      if (topDocs.scoreDocs[i].score >= minScoreInTopK) {
+        totalRelevantDocs++;
+      }
     }
-
-    private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
-        String fieldName,
-        float[] target,
-        int k,
-        Query filterQuery,
-        int overQueryFactor
-    ) {
-        return new JVectorKnnFloatVectorQuery(
-            fieldName,
-            target,
-            k,
-            filterQuery,
-            overQueryFactor,
-            KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
-            KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
-            KNNConstants.DEFAULT_QUERY_USE_PRUNING
-        );
+    float recall = ((float) totalRelevantDocs) / ((float) topDocs.scoreDocs.length);
+
+    if (recall == 0.0f) {
+      log.info(
+          "Recall is 0.0, this is probably not correct, here is some debug information\n topDocs: {}, minScoreInTopK: {}, totalRelevantDocs: {}",
+          topDocsToString(topDocs),
+          minScoreInTopK,
+          totalRelevantDocs);
+    }
+    return recall;
+  }
+
+  // convert topDocs to a pretty printed string
+  private String topDocsToString(TopDocs topDocs) {
+    StringBuilder sb = new StringBuilder();
+    sb.append("TopDocs: [");
+    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
+      sb.append(topDocs.scoreDocs[i].doc)
+          .append(" (")
+          .append(topDocs.scoreDocs[i].score)
+          .append("), ");
+    }
+    sb.append("]");
+    return sb.toString();
+  }
+
+  private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
+      String fieldName, float[] target, int k, Query filterQuery) {
+    return getJVectorKnnFloatVectorQuery(
+        fieldName, target, k, filterQuery, KNNConstants.DEFAULT_OVER_QUERY_FACTOR);
+  }
+
+  private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
+      String fieldName, float[] target, int k, Query filterQuery, int overQueryFactor) {
+    return new JVectorKnnFloatVectorQuery(
+        fieldName,
+        target,
+        k,
+        filterQuery,
+        overQueryFactor,
+        KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
+        KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
+        KNNConstants.DEFAULT_QUERY_USE_PRUNING);
+  }
+
+  private static float[][] getMonotonicallyIncreasingVectors(int numVectors, int vectorDimension) {
+    float[][] vectors = new float[numVectors][vectorDimension];
+    for (int i = 0; i < numVectors; i++) {
+      vectors[i] = generateZerosVectorWithLastValue(vectorDimension, i);
     }
 
-    private static float[][] getMonotonicallyIncreasingVectors(int numVectors, int vectorDimension) {
-        float[][] vectors = new float[numVectors][vectorDimension];
-        for (int i = 0; i < numVectors; i++) {
-            vectors[i] = generateZerosVectorWithLastValue(vectorDimension, i);
-        }
+    return vectors;
+  }
 
-        return vectors;
+  private static float[] generateZerosVectorWithLastValue(int vectorDimension, int lastValue) {
+    float[] vector = new float[vectorDimension];
+    for (int i = 0; i < vectorDimension - 1; i++) {
+      vector[i] = 0;
     }
-
-    private static float[] generateZerosVectorWithLastValue(int vectorDimension, int lastValue) {
-        float[] vector = new float[vectorDimension];
-        for (int i = 0; i < vectorDimension - 1; i++) {
-            vector[i] = 0;
-        }
-        vector[vectorDimension - 1] = lastValue;
-        return vector;
+    vector[vectorDimension - 1] = lastValue;
+    return vector;
+  }
+
+  private static float calculateRecall(
+      IndexReader reader, Set<Integer> groundTruthVectorsIds, TopDocs topDocs, int k)
+      throws IOException {
+    final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
+    Assert.assertEquals(groundTruthVectorsIds.size(), scoreDocs.length);
+    int totalRelevantDocs = 0;
+    for (ScoreDoc scoreDoc : scoreDocs) {
+      final int id =
+          reader
+              .storedFields()
+              .document(scoreDoc.doc)
+              .getField(TEST_ID_FIELD)
+              .storedValue()
+              .getIntValue();
+      if (groundTruthVectorsIds.contains(id)) {
+        totalRelevantDocs++;
+      }
     }
-
-    private static float calculateRecall(IndexReader reader, Set<Integer> groundTruthVectorsIds, TopDocs topDocs, int k)
-        throws IOException {
-        final ScoreDoc[] scoreDocs = topDocs.scoreDocs;
-        Assert.assertEquals(groundTruthVectorsIds.size(), scoreDocs.length);
-        int totalRelevantDocs = 0;
-        for (ScoreDoc scoreDoc : scoreDocs) {
-            final int id = reader.storedFields().document(scoreDoc.doc).getField(TEST_ID_FIELD).storedValue().getIntValue();
-            if (groundTruthVectorsIds.contains(id)) {
-                totalRelevantDocs++;
-            }
+    return ((float) totalRelevantDocs) / ((float) k);
+  }
+
+  /**
+   * Find the IDs of the ground truth vectors in the dataset
+   *
+   * @param query query vector
+   * @param dataset dataset of all the vectors with their ordinal position in the array as their ID
+   * @param k the number of expected results
+   * @return the IDs of the ground truth vectors in the dataset
+   */
+  private static Set<Integer> calculateGroundTruthVectorsIds(
+      float[] query,
+      final float[][] dataset,
+      int k,
+      VectorSimilarityFunction vectorSimilarityFunction) {
+    final Set<Integer> groundTruthVectorsIds = new HashSet<>();
+    final PriorityQueue<ScoreDoc> priorityQueue =
+        new PriorityQueue<>(k, (o1, o2) -> Float.compare(o1.score, o2.score));
+    for (int i = 0; i < dataset.length; i++) {
+      ScoreDoc scoreDoc = new ScoreDoc(i, vectorSimilarityFunction.compare(query, dataset[i]));
+      if (priorityQueue.size() >= k) {
+        final ScoreDoc top = priorityQueue.poll();
+        if (top.score < scoreDoc.score) {
+          priorityQueue.add(scoreDoc);
+        } else {
+          priorityQueue.add(top);
         }
-        return ((float) totalRelevantDocs) / ((float) k);
+      } else {
+        priorityQueue.add(scoreDoc);
+      }
     }
-
-    /**
-     * Find the IDs of the ground truth vectors in the dataset
-     * @param query query vector
-     * @param dataset dataset of all the vectors with their ordinal position in the array as their ID
-     * @param k the number of expected results
-     * @return the IDs of the ground truth vectors in the dataset
-     */
-    private static Set<Integer> calculateGroundTruthVectorsIds(
-        float[] query,
-        final float[][] dataset,
-        int k,
-        VectorSimilarityFunction vectorSimilarityFunction
-    ) {
-        final Set<Integer> groundTruthVectorsIds = new HashSet<>();
-        final PriorityQueue<ScoreDoc> priorityQueue = new PriorityQueue<>(k, (o1, o2) -> Float.compare(o1.score, o2.score));
-        for (int i = 0; i < dataset.length; i++) {
-            ScoreDoc scoreDoc = new ScoreDoc(i, vectorSimilarityFunction.compare(query, dataset[i]));
-            if (priorityQueue.size() >= k) {
-                final ScoreDoc top = priorityQueue.poll();
-                if (top.score < scoreDoc.score) {
-                    priorityQueue.add(scoreDoc);
-                } else {
-                    priorityQueue.add(top);
-                }
-            } else {
-                priorityQueue.add(scoreDoc);
-            }
-        }
-        while (!priorityQueue.isEmpty()) {
-            groundTruthVectorsIds.add(priorityQueue.poll().doc);
-        }
-
-        return groundTruthVectorsIds;
+    while (!priorityQueue.isEmpty()) {
+      groundTruthVectorsIds.add(priorityQueue.poll().doc);
     }
+
+    return groundTruthVectorsIds;
+  }
 }

From 3b98cd8626100b28ea28eb0709bb90cd02fdced9 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 15:24:22 +0000
Subject: [PATCH 05/86] Fix package declarations

---
 .../sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java      | 2 +-
 .../lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java      | 2 +-
 .../lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java | 2 +-
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java | 2 +-
 .../lucene/sandbox/codecs/jvector/JVectorIndexWriter.java       | 2 +-
 .../lucene/sandbox/codecs/jvector/JVectorKnnCollector.java      | 2 +-
 .../sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java      | 2 +-
 .../sandbox/codecs/jvector/JVectorRandomAccessReader.java       | 2 +-
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java | 2 +-
 .../lucene/sandbox/codecs/jvector/JVectorVectorScorer.java      | 2 +-
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java | 2 +-
 .../apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java   | 2 +-
 12 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
index d43e7e4ac80f..2e74da91c8d0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/ForceMergesOnlyMergePolicy.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import java.io.IOException;
 import java.util.List;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index ce6050088d68..97daea71c3ab 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import java.io.IOException;
 import java.util.Arrays;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 5dba75410ac0..c4039c6d12b9 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
 import io.github.jbellis.jvector.util.Bits;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index a7f3a13ee865..1f6ee2b93080 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import java.io.IOException;
 import java.util.concurrent.ForkJoinPool;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
index 3a99635582a7..70217c1f1f25 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import io.github.jbellis.jvector.disk.IndexWriter;
 import java.io.IOException;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
index c5490349ef0a..8051e967e884 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import lombok.Value;
 import org.apache.lucene.search.KnnCollector;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
index d2ece0b9eebc..f8903d67bde5 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
@@ -14,7 +14,7 @@
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import java.io.IOException;
 import org.apache.lucene.index.FloatVectorValues;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 25f49a897c76..8394fa1c9ada 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.disk.ReaderSupplier;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 95a98830ff5d..6cbd237c9b15 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.disk.ReaderSupplier;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
index 6b7937f51525..cc6f3e6d6bff 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 9b17c6165dfd..0911e2f0d524 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED;
 import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index b2f2ea075d3d..77949d7d039e 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -15,7 +15,7 @@
  * limitations under the License.
  */
 
-package org.opensearch.knn.index.codec.jvector;
+package org.apache.lucene.sandbox.codecs.jvector;
 
 import static org.opensearch.knn.common.KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
 import static org.opensearch.knn.index.engine.CommonTestUtils.getCodec;

From 8f00f87707c4b92bc47dd0abe6779d67b87ff9af Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:03:38 +0000
Subject: [PATCH 06/86] Remove logging

---
 .../codecs/jvector/GraphNodeIdToDocMap.java   |  10 +-
 .../sandbox/codecs/jvector/JVectorFormat.java |   5 -
 .../codecs/jvector/JVectorIndexWriter.java    |   2 -
 .../jvector/JVectorRandomAccessReader.java    |   4 -
 .../sandbox/codecs/jvector/JVectorReader.java |  18 +--
 .../sandbox/codecs/jvector/JVectorWriter.java | 132 ++--------------
 .../codecs/jvector/KNNJVectorTests.java       | 149 +++---------------
 7 files changed, 39 insertions(+), 281 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index 97daea71c3ab..0bd8febec442 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 import java.util.Arrays;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.index.Sorter;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -33,7 +32,6 @@
  *
  * <p>Which means that we also need to persist this mapping to disk to be available across merges.
  */
-@Log4j2
 public class GraphNodeIdToDocMap {
   private static final int VERSION = 1;
   private int[] graphNodeIdsToDocIds;
@@ -88,12 +86,8 @@ public GraphNodeIdToDocMap(int[] graphNodeIdsToDocIds) {
               + " is less than the number of ordinals "
               + graphNodeIdsToDocIds.length);
     }
-    if (maxDocId > graphNodeIdsToDocIds.length) {
-      log.warn(
-          "Max doc id {} is greater than the number of ordinals {}, this implies a lot of deleted documents. Or that some documents are missing vectors. Wasting a lot of memory",
-          maxDocId,
-          graphNodeIdsToDocIds.length);
-    }
+    // When maxDocId > graphNodeIdsToDocIds.length, there are lots of deleted documents or missing
+    // values, which wastes memory
     this.docIdsToGraphNodeIds = new int[maxDocs];
     Arrays.fill(this.docIdsToGraphNodeIds, -1); // -1 means no mapping to ordinal
     for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 1f6ee2b93080..020a82835d60 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -21,7 +21,6 @@
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.ForkJoinWorkerThread;
 import java.util.function.Function;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
@@ -29,7 +28,6 @@
 import org.apache.lucene.index.SegmentWriteState;
 import org.opensearch.knn.common.KNNConstants;
 
-@Log4j2
 public class JVectorFormat extends KnnVectorsFormat {
   public static final String NAME = "JVectorFormat";
   public static final String META_CODEC_NAME = "JVectorVectorsFormatMeta";
@@ -206,9 +204,6 @@ public static ForkJoinPool getPhysicalCoreExecutor() {
           return thread;
         };
 
-    log.info(
-        "Creating SIMD ForkJoinPool with {} physical cores for JVector SIMD operations",
-        estimatedPhysicalCoreCount);
     return new ForkJoinPool(estimatedPhysicalCoreCount, factory, null, true);
   }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
index 70217c1f1f25..6483d7c71393 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -19,14 +19,12 @@
 
 import io.github.jbellis.jvector.disk.IndexWriter;
 import java.io.IOException;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.store.IndexOutput;
 
 /**
  * JVectorRandomAccessWriter is a wrapper around IndexOutput that implements RandomAccessWriter.
  * Note: This is not thread safe!
  */
-@Log4j2
 public class JVectorIndexWriter implements IndexWriter {
   private final IndexOutput indexOutputDelegate;
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 8394fa1c9ada..97f7cec66dec 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -25,11 +25,9 @@
 import java.nio.FloatBuffer;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.atomic.AtomicInteger;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.IOUtils;
 
-@Log4j2
 public class JVectorRandomAccessReader implements RandomAccessReader {
   private final byte[] internalBuffer = new byte[Long.BYTES];
   private final byte[] internalFloatBuffer = new byte[Float.BYTES];
@@ -121,10 +119,8 @@ public void read(float[] floats, int offset, int count) throws IOException {
 
   @Override
   public void close() throws IOException {
-    log.debug("Closing JVectorRandomAccessReader for file: {}", indexInputDelegate);
     this.closed = true;
     // no need to really close the index input delegate since it is a clone
-    log.debug("Closed JVectorRandomAccessReader for file: {}", indexInputDelegate);
   }
 
   @Override
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 6cbd237c9b15..8110937aec99 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -37,7 +37,6 @@
 import java.util.List;
 import java.util.Map;
 import java.util.Optional;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.index.*;
@@ -48,7 +47,6 @@
 import org.opensearch.knn.common.KNNConstants;
 import org.opensearch.knn.plugin.stats.KNNCounter;
 
-@Log4j2
 public class JVectorReader extends KnnVectorsReader {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
@@ -146,8 +144,8 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
     if (knnCollector instanceof JVectorKnnCollector) {
       jvectorKnnCollector = (JVectorKnnCollector) knnCollector;
     } else {
-      log.warn(
-          "KnnCollector must be of type JVectorKnnCollector, for now we will re-wrap it but this is not ideal");
+      // KnnCollector must be of type JVectorKnnCollector, for now we will re-wrap it but this is
+      // not ideal
       jvectorKnnCollector =
           new JVectorKnnCollector(
               knnCollector,
@@ -199,7 +197,6 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
         }
         final long graphSearchEnd = System.currentTimeMillis();
         final long searchTime = graphSearchEnd - graphSearchStart;
-        log.debug("Search (including acquiring view) took {} ms", searchTime);
 
         // Collect the below metrics about the search and somehow wire this back to {@link
         // @KNNStats}
@@ -214,12 +211,6 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
         KNNCounter.KNN_QUERY_EXPANDED_NODES.add(expandedCount);
         KNNCounter.KNN_QUERY_EXPANDED_BASE_LAYER_NODES.add(expandedBaseLayerCount);
         KNNCounter.KNN_QUERY_GRAPH_SEARCH_TIME.add(searchTime);
-        log.debug(
-            "rerankedCount: {}, visitedNodesCount: {}, expandedCount: {}, expandedBaseLayerCount: {}",
-            rerankedCount,
-            visitedNodesCount,
-            expandedCount,
-            expandedBaseLayerCount);
       }
     }
   }
@@ -270,7 +261,6 @@ class FieldEntry implements Closeable {
     public FieldEntry(
         FieldInfo fieldInfo, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata)
         throws IOException {
-      this.fieldInfo = fieldInfo;
       this.similarityFunction =
           VectorSimilarityMapper.ordToDistFunc(
               vectorIndexFieldMetadata.getVectorSimilarityFunction().ordinal());
@@ -316,10 +306,6 @@ public FieldEntry(
                 directory.openInput(vectorIndexFieldDataFileName, IOContext.READONCE),
                 pqCodebooksAndVectorsOffset,
                 pqCodebooksAndVectorsLength);
-        log.debug(
-            "Loading PQ codebooks and vectors for field {}, with numbers of vectors: {}",
-            fieldInfo.name,
-            state.segmentInfo.maxDoc());
         try (final var randomAccessReader = pqCodebooksReaderSupplier.get()) {
           this.pqVectors = PQVectors.load(randomAccessReader);
         }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 0911e2f0d524..536f5f49517f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -45,7 +45,6 @@
 import lombok.Builder;
 import lombok.Getter;
 import lombok.Value;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -91,7 +90,6 @@
  * MergeState.DocMap} provided in the {@link MergeState}. And across sorts with {@link
  * GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
  */
-@Log4j2
 public class JVectorWriter extends KnnVectorsWriter {
   private static final long SHALLOW_RAM_BYTES_USED =
       RamUsageEstimator.shallowSizeOfInstance(JVectorWriter.class);
@@ -178,16 +176,14 @@ public JVectorWriter(
 
   @Override
   public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException {
-    log.info("Adding field {} in segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
     if (fieldInfo.getVectorEncoding() == VectorEncoding.BYTE) {
       final String errorMessage =
           "byte[] vectors are not supported in JVector. "
               + "Instead you should only use float vectors and leverage product quantization during indexing."
               + "This can provides much greater savings in storage and memory";
-      log.error(errorMessage);
       throw new UnsupportedOperationException(errorMessage);
     }
-    FieldWriter<?> newField = new FieldWriter<>(fieldInfo, segmentWriteState.segmentInfo.name);
+    FieldWriter<?> newField = new FieldWriter<>(fieldInfo);
 
     fields.add(newField);
     return newField;
@@ -195,8 +191,6 @@ public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException
 
   @Override
   public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
-    log.info(
-        "Merging field {} into segment {}", fieldInfo.name, segmentWriteState.segmentInfo.name);
     try {
       final long mergeStart = Clock.systemDefaultZone().millis();
       switch (fieldInfo.getVectorEncoding()) {
@@ -210,25 +204,13 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
       final long mergeEnd = Clock.systemDefaultZone().millis();
       final long mergeTime = mergeEnd - mergeStart;
       KNNCounter.KNN_GRAPH_MERGE_TIME.add(mergeTime);
-      log.info(
-          "Completed Merge field {} into segment {}",
-          fieldInfo.name,
-          segmentWriteState.segmentInfo.name);
     } catch (Exception e) {
-      log.error(
-          "Error merging field {} into segment {}",
-          fieldInfo.name,
-          segmentWriteState.segmentInfo.name,
-          e);
       throw e;
     }
   }
 
   @Override
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
-    log.info("Flushing {} fields", fields.size());
-
-    log.info("Flushing jVector graph index");
     for (FieldWriter<?> field : fields) {
       final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
       final int[] newToOldOrds = new int[randomAccessVectorValues.size()];
@@ -239,17 +221,12 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
       final PQVectors pqVectors;
       final FieldInfo fieldInfo = field.fieldInfo;
       if (randomAccessVectorValues.size() >= minimumBatchSizeForQuantization) {
-        log.info("Calculating codebooks and compressed vectors for field {}", fieldInfo.name);
         pqVectors = getPQVectors(newToOldOrds, randomAccessVectorValues, fieldInfo);
         buildScoreProvider =
             BuildScoreProvider.pqBuildScoreProvider(
                 getVectorSimilarityFunction(fieldInfo), pqVectors);
       } else {
-        log.info(
-            "Vector count: {}, less than limit to trigger PQ quantization: {}, for field {}, will use full precision vectors instead.",
-            randomAccessVectorValues.size(),
-            minimumBatchSizeForQuantization,
-            fieldInfo.name);
+        // Not enough vectors for quantization; use full precision vectors instead
         pqVectors = null;
         buildScoreProvider =
             BuildScoreProvider.randomAccessScoreProvider(
@@ -292,11 +269,6 @@ private void writeField(
       GraphNodeIdToDocMap graphNodeIdToDocMap,
       OnHeapGraphIndex graph)
       throws IOException {
-    log.info(
-        "Writing field {} with vector count: {}, for segment: {}",
-        fieldInfo.name,
-        randomAccessVectorValues.size(),
-        segmentWriteState.segmentInfo.name);
     final var vectorIndexFieldMetadata =
         writeGraph(
             graph,
@@ -308,7 +280,6 @@ private void writeField(
     meta.writeInt(fieldInfo.number);
     vectorIndexFieldMetadata.toOutput(meta);
 
-    log.info("Writing neighbors score cache for field {}", fieldInfo.name);
     // field data file, which contains the graph
     final String neighborsScoreCacheIndexFieldFileName =
         baseDataFileName
@@ -365,7 +336,6 @@ private VectorIndexFieldMetadata writeGraph(
           segmentWriteState.segmentSuffix);
       final long startOffset = indexOutput.getFilePointer();
 
-      log.info("Writing graph to {}", vectorIndexFieldFileName);
       var resultBuilder =
           VectorIndexFieldMetadata.builder()
               .fieldNumber(fieldInfo.number)
@@ -392,11 +362,6 @@ private VectorIndexFieldMetadata writeGraph(
         // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed
         // vectors
         if (pqVectors != null) {
-          log.info(
-              "Writing PQ codebooks and vectors for field {} since the size is {} >= {}",
-              fieldInfo.name,
-              randomAccessVectorValues.size(),
-              minimumBatchSizeForQuantization);
           resultBuilder.pqCodebooksAndVectorsOffset(endGraphOffset);
           // write the compressed vectors and codebooks to disk
           pqVectors.write(jVectorIndexWriter);
@@ -415,13 +380,8 @@ private VectorIndexFieldMetadata writeGraph(
   private PQVectors getPQVectors(
       int[] newToOldOrds, RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo)
       throws IOException {
-    final String fieldName = fieldInfo.name;
     final VectorSimilarityFunction vectorSimilarityFunction =
         fieldInfo.getVectorSimilarityFunction();
-    log.info(
-        "Computing PQ codebooks for field {} for {} vectors",
-        fieldName,
-        randomAccessVectorValues.size());
     final long start = Clock.systemDefaultZone().millis();
     final var M = numberOfSubspacesPerVectorSupplier.apply(randomAccessVectorValues.dimension());
     final var numberOfClustersPerSubspace =
@@ -439,21 +399,11 @@ private PQVectors getPQVectors(
 
     final long end = Clock.systemDefaultZone().millis();
     final long trainingTime = end - start;
-    log.info("Computed PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
     KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
-    log.info(
-        "Encoding and building PQ vectors for field {} for {} vectors",
-        fieldName,
-        randomAccessVectorValues.size());
     // PQVectors pqVectors = pq.encodeAll(randomAccessVectorValues, SIMD_POOL);
     PQVectors pqVectors =
         PQVectors.encodeAndBuild(
             pq, newToOldOrds.length, newToOldOrds, randomAccessVectorValues, SIMD_POOL_MERGE);
-    log.info(
-        "Encoded and built PQ vectors for field {}, original size: {} bytes, compressed size: {} bytes",
-        fieldName,
-        pqVectors.getOriginalSize(),
-        pqVectors.getCompressedSize());
     return pqVectors;
   }
 
@@ -502,7 +452,6 @@ public VectorIndexFieldMetadata(IndexInput in) throws IOException {
 
   @Override
   public void finish() throws IOException {
-    log.info("Finishing segment {}", segmentWriteState.segmentInfo.name);
     if (finished) {
       throw new IllegalStateException("already finished");
     }
@@ -548,25 +497,21 @@ static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
     private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
     @Getter private final FieldInfo fieldInfo;
     private int lastDocID = -1;
-    private final String segmentName;
     private final RandomAccessVectorValues randomAccessVectorValues;
     // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to
     // the jVector ordinal
     private final List<VectorFloat<?>> vectors = new ArrayList<>();
     private final List<Integer> docIds = new ArrayList<>();
 
-    FieldWriter(FieldInfo fieldInfo, String segmentName) {
+    FieldWriter(FieldInfo fieldInfo) {
       /** For creating a new field from a flat field vectors writer. */
       this.randomAccessVectorValues =
           new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
       this.fieldInfo = fieldInfo;
-      this.segmentName = segmentName;
     }
 
     @Override
     public void addValue(int docID, T vectorValue) throws IOException {
-      log.trace(
-          "Adding value {} to field {} in segment {}", vectorValue, fieldInfo.name, segmentName);
       if (docID == lastDocID) {
         throw new IllegalArgumentException(
             "VectorValuesField \""
@@ -581,7 +526,6 @@ public void addValue(int docID, T vectorValue) throws IOException {
             "byte[] vectors are not supported in JVector. "
                 + "Instead you should only use float vectors and leverage product quantization during indexing."
                 + "This can provides much greater savings in storage and memory";
-        log.error("{}", errorMessage);
         throw new UnsupportedOperationException(errorMessage);
       } else {
         throw new IllegalArgumentException("Unsupported vector type: " + vectorValue.getClass());
@@ -603,10 +547,6 @@ public long ramBytesUsed() {
 
   static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimilarityFunction(
       FieldInfo fieldInfo) {
-    log.info(
-        "Matching vector similarity function {} for field {}",
-        fieldInfo.getVectorSimilarityFunction(),
-        fieldInfo.name);
     return switch (fieldInfo.getVectorSimilarityFunction()) {
       case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
       case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
@@ -658,7 +598,6 @@ class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
     // Vector dimension
     private final int dimension;
     private final FieldInfo fieldInfo;
-    private final MergeState mergeState;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final int[] graphNodeIdsToRavvOrds;
     private boolean deletesFound = false;
@@ -673,7 +612,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         throws IOException {
       this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
       this.fieldInfo = fieldInfo;
-      this.mergeState = mergeState;
 
       final String fieldName = fieldInfo.name;
 
@@ -792,12 +730,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
           for (int docId = it.nextDoc();
               docId != DocIdSetIterator.NO_MORE_DOCS;
               docId = it.nextDoc()) {
-            if (docMaps[readerIdx].get(docId) == -1) {
-              log.warn(
-                  "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
-                  docId,
-                  readerIdx);
-            } else {
+            if (docMaps[readerIdx].get(docId) != -1) {
               // Mapping from ravv ordinals to [readerIndex, readerOrd]
               // Map graph node id to ravv ordinal
               // Map graph node id to doc id
@@ -831,12 +764,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
             docId != DocIdSetIterator.NO_MORE_DOCS;
             docId = leadingReaderIt.nextDoc()) {
           final int newGlobalDocId = docMaps[LEADING_READER_IDX].get(docId);
-          if (newGlobalDocId == -1) {
-            log.warn(
-                "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
-                docId,
-                LEADING_READER_IDX);
-          } else {
+          if (newGlobalDocId != -1) {
             final int ravvLocalOrd = leadingReaderIt.index();
             final int ravvGlobalOrd = ravvLocalOrd + baseOrds[LEADING_READER_IDX];
             graphNodeIdToDocIds[ravvLocalOrd] = newGlobalDocId;
@@ -861,12 +789,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
           for (int docId = it.nextDoc();
               docId != DocIdSetIterator.NO_MORE_DOCS;
               docId = it.nextDoc()) {
-            if (docMaps[readerIdx].get(docId) == -1) {
-              log.warn(
-                  "Document {} in reader {} is not mapped to a global ordinal from the merge docMaps. Will skip this document for now",
-                  docId,
-                  readerIdx);
-            } else {
+            if (docMaps[readerIdx].get(docId) != -1) {
               // Mapping from ravv ordinals to [readerIndex, readerOrd]
               // Map graph node id to ravv ordinal
               // Map graph node id to doc id
@@ -896,10 +819,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       }
 
       this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(graphNodeIdToDocIds);
-      log.debug(
-          "Created RandomAccessMergedFloatVectorValues with {} total vectors from {} readers",
-          size,
-          readers.length);
     }
 
     /**
@@ -928,7 +847,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
     public void merge() throws IOException {
       // This section creates the PQVectors to be used for this merge
       // Get PQ compressor for leading reader
-      final int totalVectorsCount = size;
       final String fieldName = fieldInfo.name;
       final PQVectors pqVectors;
       final OnHeapGraphIndex graph;
@@ -941,30 +859,12 @@ public void merge() throws IOException {
       // remaining vectors
       if (leadingReader.getProductQuantizationForField(fieldInfo.name).isEmpty()) {
         // No pre-existing codebooks, check if we have enough vectors to trigger quantization
-        log.info(
-            "No Pre-existing PQ codebooks found in this merge for field {} in segment {}, will check if a new codebooks is necessary",
-            fieldName,
-            mergeState.segmentInfo.name);
         if (this.size() >= minimumBatchSizeForQuantization) {
-          log.info(
-              "Calculating new codebooks and compressed vectors for field: {}, with totalVectorCount: {}, above minimumBatchSizeForQuantization: {}",
-              fieldName,
-              totalVectorsCount,
-              minimumBatchSizeForQuantization);
           pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
         } else {
-          log.info(
-              "Not enough vectors found for field: {}, totalVectorCount: {}, is below minimumBatchSizeForQuantization: {}",
-              fieldName,
-              totalVectorsCount,
-              minimumBatchSizeForQuantization);
           pqVectors = null;
         }
       } else {
-        log.info(
-            "Pre-existing PQ codebooks found in this merge for field {} in segment {}, will refine the codebooks from the leading reader with the remaining vectors",
-            fieldName,
-            mergeState.segmentInfo.name);
         final long start = Clock.systemDefaultZone().millis();
         ProductQuantization leadingCompressor =
             leadingReader.getProductQuantizationForField(fieldName).get();
@@ -980,7 +880,6 @@ public void merge() throws IOException {
         }
         final long end = Clock.systemDefaultZone().millis();
         final long trainingTime = end - start;
-        log.info("Refined PQ codebooks for field {}, in {} millis", fieldName, trainingTime);
         KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
         pqVectors =
             PQVectors.encodeAndBuild(
@@ -998,11 +897,7 @@ public void merge() throws IOException {
         // graph = getGraph(buildScoreProvider, this, newToOldOrds, fieldInfo,
         // segmentWriteState.segmentInfo.name);
         if (!deletesFound) {
-          final String segmentName = segmentWriteState.segmentInfo.name;
-          log.info(
-              "No deletes found, and no PQ codebooks found, expanding previous graph with additional vectors for field {} in segment {}",
-              fieldName,
-              segmentName);
+          // Expand graph when there are no deletes and no PQ codebooks
           final RandomAccessReader leadingOnHeapGraphReader =
               leadingReader.getNeighborsScoreCacheForField(fieldName);
           final int numBaseVectors = leadingReader.getFloatVectorValues(fieldName).size();
@@ -1019,7 +914,7 @@ public void merge() throws IOException {
                       alpha,
                       hierarchyEnabled);
         } else {
-          log.info("Deletes found, and no PQ codebooks found, building new graph from scratch");
+          // Build a new graph from scratch when there are deletes and no PQ codebooks
           graph =
               getGraph(
                   buildScoreProvider,
@@ -1030,7 +925,7 @@ public void merge() throws IOException {
                   SIMD_POOL_MERGE);
         }
       } else {
-        log.info("PQ codebooks found, building graph from scratch with PQ vectors");
+        // Re-use PQ codebooks to build a new graph from scratch
         buildScoreProvider =
             BuildScoreProvider.pqBuildScoreProvider(
                 getVectorSimilarityFunction(fieldInfo), pqVectors);
@@ -1114,11 +1009,9 @@ public OnHeapGraphIndex getGraph(
      * To have the right mapping from docId to vector ordinal we need to use the mergedFloatVector.
      * This is the case when we are merging segments and we might have more documents than vectors.
      */
-    final long start = Clock.systemDefaultZone().millis();
     final OnHeapGraphIndex graphIndex;
     var vv = randomAccessVectorValues.threadLocalSupplier();
 
-    log.info("Building graph from merged float vector");
     // parallel graph construction from the merge documents Ids
     SIMD_POOL
         .submit(
@@ -1133,13 +1026,7 @@ public OnHeapGraphIndex getGraph(
         .join();
     graphIndexBuilder.cleanup();
     graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();
-    final long end = Clock.systemDefaultZone().millis();
 
-    log.info(
-        "Built graph for field {} in segment {} in {} millis",
-        fieldInfo.name,
-        segmentName,
-        end - start);
     return graphIndex;
   }
 
@@ -1173,7 +1060,6 @@ public VectorFloat<?> getVector(int nodeId) {
           return VECTOR_TYPE_SUPPORT.createFloatVector(copy);
         }
       } catch (IOException e) {
-        log.error("Error retrieving vector at ordinal {}", nodeId, e);
         throw new RuntimeException(e);
       }
     }
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index 77949d7d039e..e07f6519a18a 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -27,7 +27,6 @@
 import java.util.concurrent.*;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
-import lombok.extern.log4j.Log4j2;
 import org.apache.lucene.document.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.*;
@@ -51,8 +50,6 @@
 @ThreadLeakFilters(
     defaultFilters = true,
     filters = {ThreadLeakFiltersForTests.class})
-@LuceneTestCase.SuppressSysoutChecks(bugUrl = "")
-@Log4j2
 public class KNNJVectorTests extends LuceneTestCase {
   private static final String TEST_FIELD = "test_field";
   private static final String TEST_ID_FIELD = "id";
@@ -71,7 +68,6 @@ public void testJVectorKnnIndex_simpleCase() throws IOException {
     indexWriterConfig.setCodec(getCodec());
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -81,11 +77,11 @@ public void testJVectorKnnIndex_simpleCase() throws IOException {
         doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
         w.addDocument(doc);
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flush docs to make them discoverable on the file system
       w.commit();
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with 10 documents");
+        // We should now have a single segment with 10 documents;
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -110,10 +106,8 @@ public void testJVectorKnnIndex_simpleCase() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
-    log.info("successfully closed directory");
   }
 
   /** Test the scenario when not all documents are populated with the vector field */
@@ -126,7 +120,6 @@ public void testMissing_fields() throws IOException {
     indexWriterConfig.setCodec(getCodec());
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -139,11 +132,11 @@ public void testMissing_fields() throws IOException {
         doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
         w.addDocument(doc);
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flush docs to make them discoverable on the file system
       w.commit();
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with 10 documents");
+        // We should now have a single segment with 10 documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -168,10 +161,8 @@ public void testMissing_fields() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 4.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
-    log.info("successfully closed directory");
   }
 
   /**
@@ -194,7 +185,6 @@ public void test_sorted_index() throws IOException {
         new Sort(new SortField(sortFieldName, SortField.Type.INT, true))); // true = reverse order
 
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -207,11 +197,11 @@ public void test_sorted_index() throws IOException {
         doc.add(new NumericDocValuesField(sortFieldName, i));
         w.addDocument(doc);
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.commit();
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with 10 documents");
+        // We should now have a single segment with 10 documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -260,10 +250,8 @@ public void test_sorted_index() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 2.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
-    log.info("successfully closed directory");
   }
 
   /**
@@ -280,7 +268,6 @@ public void testJVectorKnnIndex_multipleSegments() throws IOException {
     indexWriterConfig.setCodec(getCodec());
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(false));
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -291,10 +278,9 @@ public void testJVectorKnnIndex_multipleSegments() throws IOException {
         w.addDocument(doc);
         w.commit(); // this creates a new segment
       }
-      log.info("Done writing all files to the file system");
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have 10 segments, each with a single document");
+        // We should now have 10 segments, each with a single document
         Assert.assertEquals(10, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
@@ -318,7 +304,6 @@ public void testJVectorKnnIndex_multipleSegments() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -338,7 +323,6 @@ public void testJVectorKnnIndex_mergeEnabled() throws IOException {
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
     indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -350,12 +334,10 @@ public void testJVectorKnnIndex_mergeEnabled() throws IOException {
         w.addDocument(doc);
         w.commit(); // this creates a new segment without triggering a merge
       }
-      log.info("Done writing all files to the file system");
 
       w.forceMerge(1); // this merges all segments into a single segment
-      log.info("Done merging all segments");
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have 1 segment with 10 documents");
+        // We should now have 1 segment with 10 documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
@@ -382,7 +364,6 @@ public void testJVectorKnnIndex_mergeEnabled() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 3.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -403,7 +384,6 @@ public void multipleMerges() throws IOException {
     indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
     final Path indexPath = createTempDir();
     final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -416,12 +396,10 @@ public void multipleMerges() throws IOException {
         w.commit(); // this creates a new segment without triggering a merge
         w.forceMerge(1); // this merges all segments into a single segment
       }
-      log.info("Done writing all files to the file system");
 
       w.forceMerge(1); // this merges all segments into a single segment
-      log.info("Done merging all segments");
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have 1 segment with 10 documents");
+        // We should now have 1 segment with 10 documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
@@ -448,7 +426,6 @@ public void multipleMerges() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 3.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -480,7 +457,6 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
 
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -494,12 +470,10 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
           w.commit(); // this creates a new segment without triggering a merge
         }
       }
-      log.info("Done writing all files to the file system");
 
       w.forceMerge(1); // this merges all segments into a single segment
-      log.info("Done merging all segments");
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have 1 segment with {} documents", totalNumberOfDocs);
+        // We should now have 1 segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
@@ -513,8 +487,6 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, k});
         final float recall = calculateRecall(topDocs, expectedMinScoreInTopK);
         Assert.assertEquals(1.0f, recall, 0.01f);
-
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -555,7 +527,6 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
 
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       for (int i = 0; i < source.length; i++) {
@@ -567,12 +538,10 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
           w.commit(); // this creates a new segment without triggering a merge
         }
       }
-      log.info("Done writing all files to the file system");
 
       w.forceMerge(1); // this merges all segments into a single segment
-      log.info("Done merging all segments");
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -584,7 +553,6 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
         Assert.assertEquals(1.0f, recall, 0.05f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -669,18 +637,6 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
                     .getIntValue();
             float[] vectorValue = vectorValues.vectorValue(docIdSetIterator.index());
             float[] expectedVectorValue = sourceVectors[globalDocId];
-            // if the vectors do not match, also look which source vector should be the right result
-            if (!Arrays.equals(expectedVectorValue, vectorValue)) {
-              for (int i = 0; i < sourceVectors.length; i++) {
-                if (Arrays.equals(sourceVectors[i], vectorValue)) {
-                  log.error(
-                      "found vector with global id: {}, in docId: {}, however the actual position of the vector in source is: {}",
-                      globalDocId,
-                      luceneDocId,
-                      i);
-                }
-              }
-            }
             Assert.assertArrayEquals(
                 "vector with global id "
                     + globalDocId
@@ -744,13 +700,11 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
                         totalQueries.incrementAndGet();
                       } catch (Throwable e) {
                         failureDetected.compareAndSet(false, true);
-                        log.error("Exception encountered", e);
                         fail("Exception during concurrent search: " + e.getMessage());
                       }
                     }
                   } finally {
                     latch.countDown();
-                    log.warn("Ran {} queries", i);
                   }
                 });
           }
@@ -765,9 +719,6 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
               numThreads * queriesPerThread,
               totalQueries.get());
 
-          // Log the number of successful queries
-          log.info("Successfully completed {} concurrent kNN search queries!", totalQueries.get());
-
         } finally {
           executor.shutdownNow();
         }
@@ -906,7 +857,6 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOExceptio
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
     indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -917,12 +867,10 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOExceptio
         w.addDocument(doc);
         w.flush(); // this creates a new segment without triggering a merge
       }
-      log.info("Done writing all files to the file system");
 
       w.forceMerge(1); // this merges all segments into a single segment
-      log.info("Done merging all segments");
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have 1 segment with 10 documents");
+        // We should now have 1 segment with 10 documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
@@ -946,7 +894,6 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOExceptio
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 8.0f}),
             topDocs.scoreDocs[2].score,
             0.01f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -967,7 +914,6 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOE
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy(true));
     indexWriterConfig.setMergeScheduler(new SerialMergeScheduler());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {1.0f, 1.0f};
@@ -978,12 +924,10 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOE
         w.addDocument(doc);
         w.flush(); // this creates a new segment without triggering a merge
       }
-      log.info("Done writing all files to the file system");
 
       w.forceMerge(1); // this merges all segments into a single segment
-      log.info("Done merging all segments");
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have 1 segment with 10 documents");
+        // We should now have 1 segment with 10 documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
@@ -1007,7 +951,6 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOE
             VectorSimilarityFunction.COSINE.compare(target, new float[] {4.0f, 6.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -1026,7 +969,6 @@ public void testJVectorKnnIndex_simpleCase_withBinaryVector() throws IOException
     indexWriterConfig.setCodec(getCodec());
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (Directory dir = newFSDirectory(indexPath);
         RandomIndexWriter w = new RandomIndexWriter(random(), dir, indexWriterConfig)) {
       final byte[] source = new byte[] {(byte) 0, (byte) 0};
@@ -1049,7 +991,6 @@ public void testJVectorKnnIndex_withFilter() throws IOException {
     indexWriterConfig.setCodec(getCodec());
     indexWriterConfig.setMergePolicy(new ForceMergesOnlyMergePolicy());
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (Directory dir = newFSDirectory(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = new float[] {0.0f, 0.0f};
@@ -1060,18 +1001,16 @@ public void testJVectorKnnIndex_withFilter() throws IOException {
         doc.add(new StringField("filter_field", i % 2 == 0 ? "even" : "odd", Field.Store.YES));
         w.addDocument(doc);
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.commit();
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("Applying filter to the KNN search");
         final Query filterQuery = new TermQuery(new Term("filter_field", "even"));
         final IndexSearcher searcher = newSearcher(reader);
         KnnFloatVectorQuery knnFloatVectorQuery =
             getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
 
-        log.info("Validating filtered KNN results");
         assertEquals(k, topDocs.totalHits.value());
         assertEquals(9, topDocs.scoreDocs[0].doc);
         Assert.assertEquals(
@@ -1088,7 +1027,6 @@ public void testJVectorKnnIndex_withFilter() throws IOException {
             VectorSimilarityFunction.EUCLIDEAN.compare(target, new float[] {0.0f, 1.0f / 6.0f}),
             topDocs.scoreDocs[2].score,
             0.001f);
-        log.info("successfully completed filtered search tests");
       }
     }
   }
@@ -1117,7 +1055,6 @@ public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException
     indexWriterConfig.setRAMPerThreadHardLimitMB(
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
@@ -1130,11 +1067,11 @@ public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException
         doc.add(new IntField(TEST_ID_FIELD, i, Field.Store.YES));
         w.addDocument(doc);
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.commit();
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -1146,7 +1083,6 @@ public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
         Assert.assertEquals(1.0f, recall, 0.05f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -1170,7 +1106,6 @@ public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOEx
     indexWriterConfig.setRAMPerThreadHardLimitMB(
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
@@ -1180,11 +1115,11 @@ public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOEx
         doc.add(new KnnFloatVectorField("test_field", source, VectorSimilarityFunction.EUCLIDEAN));
         w.addDocument(doc);
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.commit();
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -1213,8 +1148,6 @@ public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOEx
         assertEquals(k, topDocs.totalHits.value());
         float recallWithHighOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
         Assert.assertTrue(recallWithLowOverqueryFactor <= recallWithHighOverqueryFactor);
-
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -1252,7 +1185,6 @@ public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() th
     indexWriterConfig.setRAMPerThreadHardLimitMB(
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
@@ -1269,11 +1201,11 @@ public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() th
           w.commit();
         }
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.forceMerge(1);
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -1285,7 +1217,6 @@ public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() th
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
         Assert.assertEquals(1.0f, recall, 0.05f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -1327,7 +1258,6 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges(
     indexWriterConfig.setRAMPerThreadHardLimitMB(
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
@@ -1344,11 +1274,11 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges(
           w.commit();
         }
       }
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.forceMerge(1);
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -1360,7 +1290,6 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges(
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
         Assert.assertEquals(1.0f, recall, 0.05f);
-        log.info("successfully completed search tests");
       }
     }
   }
@@ -1404,7 +1333,6 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_
     indexWriterConfig.setRAMPerThreadHardLimitMB(
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
@@ -1424,11 +1352,11 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_
         }
       }
       w.commit();
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.forceMerge(1);
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -1441,7 +1369,6 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
         Assert.assertEquals(
             "Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
-        log.info("successfully completed search tests");
       }
     }
 
@@ -1485,7 +1412,6 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
     indexWriterConfig.setRAMPerThreadHardLimitMB(
         1000); // 1000MB per thread, this way we make sure that no premature flush will occur
     final Path indexPath = createTempDir();
-    log.info("Index path: {}", indexPath);
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
@@ -1514,11 +1440,11 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
         }
       }
       w.commit();
-      log.info("Flushing docs to make them discoverable on the file system");
+      // Flushing docs to make them discoverable on the file system
       w.forceMerge(1);
 
       try (IndexReader reader = DirectoryReader.open(w)) {
-        log.info("We should now have a single segment with {} documents", totalNumberOfDocs);
+        // We should now have a single segment with totalNumberOfDocs documents
         Assert.assertEquals(1, reader.getContext().leaves().size());
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
 
@@ -1531,7 +1457,6 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
         Assert.assertEquals(
             "Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
-        log.info("successfully completed search tests");
       }
     }
 
@@ -1555,31 +1480,9 @@ private float calculateRecall(TopDocs topDocs, float minScoreInTopK) {
       }
     }
     float recall = ((float) totalRelevantDocs) / ((float) topDocs.scoreDocs.length);
-
-    if (recall == 0.0f) {
-      log.info(
-          "Recall is 0.0, this is probably not correct, here is some debug information\n topDocs: {}, minScoreInTopK: {}, totalRelevantDocs: {}",
-          topDocsToString(topDocs),
-          minScoreInTopK,
-          totalRelevantDocs);
-    }
     return recall;
   }
 
-  // convert topDocs to a pretty printed string
-  private String topDocsToString(TopDocs topDocs) {
-    StringBuilder sb = new StringBuilder();
-    sb.append("TopDocs: [");
-    for (int i = 0; i < topDocs.scoreDocs.length; i++) {
-      sb.append(topDocs.scoreDocs[i].doc)
-          .append(" (")
-          .append(topDocs.scoreDocs[i].score)
-          .append("), ");
-    }
-    sb.append("]");
-    return sb.toString();
-  }
-
   private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
       String fieldName, float[] target, int k, Query filterQuery) {
     return getJVectorKnnFloatVectorQuery(

From 2e2f5640adaa8985c0782866d6c4cd67810e07d5 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:31:11 +0000
Subject: [PATCH 07/86] Remove KNNCounter stats

---
 .../sandbox/codecs/jvector/JVectorReader.java | 18 -----------------
 .../sandbox/codecs/jvector/JVectorWriter.java | 14 -------------
 .../codecs/jvector/KNNJVectorTests.java       | 20 +++----------------
 3 files changed, 3 insertions(+), 49 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 8110937aec99..6ff34b02c2ae 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -45,7 +45,6 @@
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 import org.opensearch.knn.common.KNNConstants;
-import org.opensearch.knn.plugin.stats.KNNCounter;
 
 public class JVectorReader extends KnnVectorsReader {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
@@ -160,7 +159,6 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
     final SearchScoreProvider ssp;
 
     try (var view = index.getView()) {
-      final long graphSearchStart = System.currentTimeMillis();
       if (fieldEntryMap.get(field).pqVectors
           != null) { // Quantized, use the precomputed score function
         final PQVectors pqVectors = fieldEntryMap.get(field).pqVectors;
@@ -195,22 +193,6 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
         for (SearchResult.NodeScore ns : searchResults.getNodes()) {
           jvectorKnnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
         }
-        final long graphSearchEnd = System.currentTimeMillis();
-        final long searchTime = graphSearchEnd - graphSearchStart;
-
-        // Collect the below metrics about the search and somehow wire this back to {@link
-        // @KNNStats}
-        final int visitedNodesCount = searchResults.getVisitedCount();
-        final int rerankedCount = searchResults.getRerankedCount();
-
-        final int expandedCount = searchResults.getExpandedCount();
-        final int expandedBaseLayerCount = searchResults.getExpandedCountBaseLayer();
-
-        KNNCounter.KNN_QUERY_VISITED_NODES.add(visitedNodesCount);
-        KNNCounter.KNN_QUERY_RERANKED_COUNT.add(rerankedCount);
-        KNNCounter.KNN_QUERY_EXPANDED_NODES.add(expandedCount);
-        KNNCounter.KNN_QUERY_EXPANDED_BASE_LAYER_NODES.add(expandedBaseLayerCount);
-        KNNCounter.KNN_QUERY_GRAPH_SEARCH_TIME.add(searchTime);
       }
     }
   }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 536f5f49517f..96eae3227ca0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -36,7 +36,6 @@
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
 import java.io.IOException;
 import java.io.UnsupportedEncodingException;
-import java.time.Clock;
 import java.util.*;
 import java.util.concurrent.ForkJoinPool;
 import java.util.function.Function;
@@ -56,7 +55,6 @@
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
-import org.opensearch.knn.plugin.stats.KNNCounter;
 
 /**
  * JVectorWriter is responsible for writing vector data into index segments using the JVector
@@ -192,7 +190,6 @@ public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException
   @Override
   public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
     try {
-      final long mergeStart = Clock.systemDefaultZone().millis();
       switch (fieldInfo.getVectorEncoding()) {
         case BYTE:
           throw new UnsupportedEncodingException("Byte vectors are not supported in JVector.");
@@ -201,9 +198,6 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
           mergeRavv.merge();
           break;
       }
-      final long mergeEnd = Clock.systemDefaultZone().millis();
-      final long mergeTime = mergeEnd - mergeStart;
-      KNNCounter.KNN_GRAPH_MERGE_TIME.add(mergeTime);
     } catch (Exception e) {
       throw e;
     }
@@ -382,7 +376,6 @@ private PQVectors getPQVectors(
       throws IOException {
     final VectorSimilarityFunction vectorSimilarityFunction =
         fieldInfo.getVectorSimilarityFunction();
-    final long start = Clock.systemDefaultZone().millis();
     final var M = numberOfSubspacesPerVectorSupplier.apply(randomAccessVectorValues.dimension());
     final var numberOfClustersPerSubspace =
         Math.min(256, randomAccessVectorValues.size()); // number of centroids per
@@ -397,9 +390,6 @@ private PQVectors getPQVectors(
             SIMD_POOL_MERGE,
             ForkJoinPool.commonPool());
 
-    final long end = Clock.systemDefaultZone().millis();
-    final long trainingTime = end - start;
-    KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
     // PQVectors pqVectors = pq.encodeAll(randomAccessVectorValues, SIMD_POOL);
     PQVectors pqVectors =
         PQVectors.encodeAndBuild(
@@ -865,7 +855,6 @@ public void merge() throws IOException {
           pqVectors = null;
         }
       } else {
-        final long start = Clock.systemDefaultZone().millis();
         ProductQuantization leadingCompressor =
             leadingReader.getProductQuantizationForField(fieldName).get();
         // Refine the leadingCompressor with the remaining vectors in the merge, we skip the leading
@@ -878,9 +867,6 @@ public void merge() throws IOException {
               new RandomAccessVectorValuesOverVectorValues(values);
           leadingCompressor.refine(randomAccessVectorValues);
         }
-        final long end = Clock.systemDefaultZone().millis();
-        final long trainingTime = end - start;
-        KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.add(trainingTime);
         pqVectors =
             PQVectors.encodeAndBuild(
                 leadingCompressor,
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index e07f6519a18a..636279e43693 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -39,7 +39,6 @@
 import org.opensearch.knn.TestUtils;
 import org.opensearch.knn.common.KNNConstants;
 import org.opensearch.knn.index.ThreadLeakFiltersForTests;
-import org.opensearch.knn.plugin.stats.KNNCounter;
 
 /** Test used specifically for JVector */
 // Currently {@link IndexGraphBuilder} is using the default ForkJoinPool.commonPool() which is not
@@ -1371,11 +1370,8 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_
             "Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
       }
     }
-
-    Assert.assertTrue(
-        "No quantization time recorded", KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount() > 0);
-    Assert.assertTrue(
-        "No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+    // TODO: assert no quantization
+    // TODO: assert no graph merge
   }
 
   /**
@@ -1427,16 +1423,8 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
         doc.add(new KnnFloatVectorField(TEST_FIELD, source, vectorSimilarityFunction));
         w.addDocument(doc);
         if (i % idealBatchSize == 0) {
-          final long beforeTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
           w.commit();
           w.forceMerge(1); // force merge will trigger PQ refinement if other segments are present
-          final long afterTrainingTime = KNNCounter.KNN_QUANTIZATION_TRAINING_TIME.getCount();
-          Assert.assertTrue(
-              "Expected to have a training time of at least "
-                  + beforeTrainingTime
-                  + " but got "
-                  + afterTrainingTime,
-              afterTrainingTime >= beforeTrainingTime);
         }
       }
       w.commit();
@@ -1459,9 +1447,7 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
             "Expected to have recall of 1.0+/-0.05 but got " + recall, 1.0f, recall, 0.05f);
       }
     }
-
-    Assert.assertTrue(
-        "No graph merge time recorded", KNNCounter.KNN_GRAPH_MERGE_TIME.getCount() > 0);
+    // TODO: Assert no graph merge
   }
 
   /**

From a47ca910c84b0d6cd16286167717eecd7a149b76 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:38:30 +0000
Subject: [PATCH 08/86] Fix references to missing KNNConstants

---
 .../sandbox/codecs/jvector/JVectorFormat.java | 19 +++++++++++--------
 .../sandbox/codecs/jvector/JVectorReader.java | 14 +++++++++-----
 .../codecs/jvector/KNNJVectorTests.java       | 11 +++++------
 3 files changed, 25 insertions(+), 19 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 020a82835d60..1d208d31415f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -26,7 +26,6 @@
 import org.apache.lucene.codecs.KnnVectorsWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
-import org.opensearch.knn.common.KNNConstants;
 
 public class JVectorFormat extends KnnVectorsFormat {
   public static final String NAME = "JVectorFormat";
@@ -44,6 +43,10 @@ public class JVectorFormat extends KnnVectorsFormat {
   public static final int VERSION_CURRENT = VERSION_START;
   public static final int DEFAULT_MAX_CONN = 32;
   public static final int DEFAULT_BEAM_WIDTH = 100;
+  public static final int DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION = 1024;
+  public static final float DEFAULT_NEIGHBOR_OVERFLOW = 2f;
+  public static final float DEFAULT_ALPHA = 2f;
+  public static final boolean DEFAULT_HIERARCHY_ENABLED = true;
   // Unfortunately, this can't be managed yet by the OpenSearch ThreadPool because it's not
   // supporting {@link ForkJoinPool} types
   public static final ForkJoinPool SIMD_POOL_MERGE = getPhysicalCoreExecutor();
@@ -63,11 +66,11 @@ public JVectorFormat() {
         NAME,
         DEFAULT_MAX_CONN,
         DEFAULT_BEAM_WIDTH,
-        KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
-        KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
+        DEFAULT_NEIGHBOR_OVERFLOW,
+        DEFAULT_ALPHA,
         JVectorFormat::getDefaultNumberOfSubspacesPerVector,
-        KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION,
-        KNNConstants.DEFAULT_HIERARCHY_ENABLED);
+        DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION,
+        DEFAULT_HIERARCHY_ENABLED);
   }
 
   public JVectorFormat(int minBatchSizeForQuantization) {
@@ -75,11 +78,11 @@ public JVectorFormat(int minBatchSizeForQuantization) {
         NAME,
         DEFAULT_MAX_CONN,
         DEFAULT_BEAM_WIDTH,
-        KNNConstants.DEFAULT_NEIGHBOR_OVERFLOW_VALUE.floatValue(),
-        KNNConstants.DEFAULT_ALPHA_VALUE.floatValue(),
+        DEFAULT_NEIGHBOR_OVERFLOW,
+        DEFAULT_ALPHA,
         JVectorFormat::getDefaultNumberOfSubspacesPerVector,
         minBatchSizeForQuantization,
-        KNNConstants.DEFAULT_HIERARCHY_ENABLED);
+        DEFAULT_HIERARCHY_ENABLED);
   }
 
   public JVectorFormat(
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 6ff34b02c2ae..a5d07b8ba63a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -44,9 +44,13 @@
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
-import org.opensearch.knn.common.KNNConstants;
 
 public class JVectorReader extends KnnVectorsReader {
+  public static final float DEFAULT_QUERY_SIMILARITY_THRESHOLD = 0f;
+  public static final float DEFAULT_QUERY_RERANK_FLOOR = 0f;
+  public static final int DEFAULT_OVER_QUERY_FACTOR = 3;
+  public static final boolean DEFAULT_QUERY_USE_PRUNING = false;
+
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
 
@@ -148,10 +152,10 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
       jvectorKnnCollector =
           new JVectorKnnCollector(
               knnCollector,
-              KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
-              KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
-              KNNConstants.DEFAULT_OVER_QUERY_FACTOR,
-              KNNConstants.DEFAULT_QUERY_USE_PRUNING);
+              DEFAULT_QUERY_SIMILARITY_THRESHOLD,
+              DEFAULT_QUERY_RERANK_FLOOR,
+              DEFAULT_OVER_QUERY_FACTOR,
+              DEFAULT_QUERY_USE_PRUNING);
     }
 
     // search for a random vector using a GraphSearcher and SearchScoreProvider
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index 636279e43693..de9554f6bcff 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -17,7 +17,7 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
-import static org.opensearch.knn.common.KNNConstants.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
+import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
 import static org.opensearch.knn.index.engine.CommonTestUtils.getCodec;
 
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
@@ -37,7 +37,6 @@
 import org.junit.Assert;
 import org.junit.Test;
 import org.opensearch.knn.TestUtils;
-import org.opensearch.knn.common.KNNConstants;
 import org.opensearch.knn.index.ThreadLeakFiltersForTests;
 
 /** Test used specifically for JVector */
@@ -1472,7 +1471,7 @@ private float calculateRecall(TopDocs topDocs, float minScoreInTopK) {
   private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
       String fieldName, float[] target, int k, Query filterQuery) {
     return getJVectorKnnFloatVectorQuery(
-        fieldName, target, k, filterQuery, KNNConstants.DEFAULT_OVER_QUERY_FACTOR);
+        fieldName, target, k, filterQuery, JVectorReader.DEFAULT_OVER_QUERY_FACTOR);
   }
 
   private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
@@ -1483,9 +1482,9 @@ private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
         k,
         filterQuery,
         overQueryFactor,
-        KNNConstants.DEFAULT_QUERY_SIMILARITY_THRESHOLD.floatValue(),
-        KNNConstants.DEFAULT_QUERY_RERANK_FLOOR.floatValue(),
-        KNNConstants.DEFAULT_QUERY_USE_PRUNING);
+        JVectorReader.DEFAULT_QUERY_SIMILARITY_THRESHOLD,
+        JVectorReader.DEFAULT_QUERY_RERANK_FLOOR,
+        JVectorReader.DEFAULT_QUERY_USE_PRUNING);
   }
 
   private static float[][] getMonotonicallyIncreasingVectors(int numVectors, int vectorDimension) {

From c30a0702be9721552ae56cf9f2fb8bc6295fd386 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:41:39 +0000
Subject: [PATCH 09/86] Remove lombok.Value annotation from JVectorKnnCollector

---
 .../codecs/jvector/JVectorKnnCollector.java   | 25 +++++++++++++------
 .../sandbox/codecs/jvector/JVectorReader.java |  6 ++---
 2 files changed, 21 insertions(+), 10 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
index 8051e967e884..d2fad6532570 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
@@ -16,7 +16,6 @@
  */
 package org.apache.lucene.sandbox.codecs.jvector;
 
-import lombok.Value;
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.search.knn.KnnSearchStrategy;
@@ -25,13 +24,25 @@
  * Wrapper class for KnnCollector that provides passing of additional parameters specific for
  * JVector.
  */
-@Value
 public class JVectorKnnCollector implements KnnCollector {
-  KnnCollector delegate;
-  float threshold;
-  float rerankFloor;
-  int overQueryFactor;
-  boolean usePruning;
+  final KnnCollector delegate;
+  final float threshold;
+  final float rerankFloor;
+  final int overQueryFactor;
+  final boolean usePruning;
+
+  public JVectorKnnCollector(
+      KnnCollector delegate,
+      float threshold,
+      float rerankFloor,
+      int overQueryFactor,
+      boolean usePruning) {
+    this.delegate = delegate;
+    this.threshold = threshold;
+    this.rerankFloor = rerankFloor;
+    this.overQueryFactor = overQueryFactor;
+    this.usePruning = usePruning;
+  }
 
   @Override
   public boolean earlyTerminated() {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index a5d07b8ba63a..698258744509 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -190,9 +190,9 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
             graphSearcher.search(
                 ssp,
                 jvectorKnnCollector.k(),
-                jvectorKnnCollector.k() * jvectorKnnCollector.getOverQueryFactor(),
-                jvectorKnnCollector.getThreshold(),
-                jvectorKnnCollector.getRerankFloor(),
+                jvectorKnnCollector.k() * jvectorKnnCollector.overQueryFactor,
+                jvectorKnnCollector.threshold,
+                jvectorKnnCollector.rerankFloor,
                 compatibleBits);
         for (SearchResult.NodeScore ns : searchResults.getNodes()) {
           jvectorKnnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);

From d9e5ba316435f576e510f27507f6e573ce90a663 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:42:32 +0000
Subject: [PATCH 10/86] Fix AcceptDocs param in JVectorKnnFloatVectorQuery

---
 .../sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java     | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
index f8903d67bde5..50246250ad60 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
@@ -23,7 +23,6 @@
 import org.apache.lucene.search.*;
 import org.apache.lucene.search.knn.KnnCollectorManager;
 import org.apache.lucene.search.knn.KnnSearchStrategy;
-import org.apache.lucene.util.Bits;
 
 /**
  * {@link KnnFloatVectorQuery} that uses jVector to perform the search. We use this wrapper simply
@@ -70,7 +69,7 @@ public JVectorKnnFloatVectorQuery(
   @Override
   protected TopDocs approximateSearch(
       LeafReaderContext context,
-      Bits acceptDocs,
+      AcceptDocs acceptDocs,
       int visitedLimit,
       KnnCollectorManager knnCollectorManager)
       throws IOException {

From 6b178d8fcc27c1dfe36d535d67348753cad44f31 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:55:50 +0000
Subject: [PATCH 11/86] Fix AcceptDocs param in JVectorReader

---
 .../sandbox/codecs/jvector/JVectorReader.java   | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 698258744509..35482cd91dbe 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -27,6 +27,7 @@
 import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
 import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.quantization.ProductQuantization;
+import io.github.jbellis.jvector.util.Bits;
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
@@ -40,9 +41,9 @@
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.index.*;
+import org.apache.lucene.search.AcceptDocs;
 import org.apache.lucene.search.KnnCollector;
 import org.apache.lucene.store.*;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 
 public class JVectorReader extends KnnVectorsReader {
@@ -140,7 +141,7 @@ public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
   }
 
   @Override
-  public void search(String field, float[] target, KnnCollector knnCollector, Bits acceptDocs)
+  public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs)
       throws IOException {
     final OnDiskGraphIndex index = fieldEntryMap.get(field).index;
     final JVectorKnnCollector jvectorKnnCollector;
@@ -182,8 +183,14 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
       // Logic works as follows: if acceptDocs is null, we accept all ordinals. Otherwise, we check
       // if the jVector ordinal has a
       // corresponding Lucene doc ID accepted by acceptDocs filter.
-      io.github.jbellis.jvector.util.Bits compatibleBits =
-          ord -> acceptDocs == null || acceptDocs.get(jvectorLuceneDocMap.getLuceneDocId(ord));
+
+      Bits compatibleBits = Bits.ALL;
+      if (acceptDocs != null) {
+        final var luceneBits = acceptDocs.bits();
+        if (luceneBits != null) {
+          compatibleBits = ord -> luceneBits.get(jvectorLuceneDocMap.getLuceneDocId(ord));
+        }
+      }
 
       try (var graphSearcher = new GraphSearcher(index)) {
         final var searchResults =
@@ -202,7 +209,7 @@ public void search(String field, float[] target, KnnCollector knnCollector, Bits
   }
 
   @Override
-  public void search(String field, byte[] target, KnnCollector knnCollector, Bits acceptDocs)
+  public void search(String field, byte[] target, KnnCollector knnCollector, AcceptDocs acceptDocs)
       throws IOException {
     // TODO: implement this
     throw new UnsupportedOperationException("Byte vector search is not supported yet with jVector");

From 9604146db48484c4dfa214e00c724c444f66ea25 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 16:57:59 +0000
Subject: [PATCH 12/86] Fix static imports of SIMD_POOL*

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 96eae3227ca0..740d6459cc5c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -19,8 +19,8 @@
 
 import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED;
 import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
-import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_FLUSH;
-import static org.opensearch.knn.index.codec.jvector.JVectorFormat.SIMD_POOL_MERGE;
+import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_FLUSH;
+import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_MERGE;
 
 import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.graph.*;

From caba8d9f33a84c0cf8f102ca6351094d12216af9 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:07:38 +0000
Subject: [PATCH 13/86] Remove Lombok.Getter from JVectorWriter

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java    | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 740d6459cc5c..9b9e8e7cfdb9 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -42,7 +42,6 @@
 import java.util.stream.IntStream;
 import lombok.AllArgsConstructor;
 import lombok.Builder;
-import lombok.Getter;
 import lombok.Value;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
@@ -485,7 +484,7 @@ static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
     private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
         VectorizationProvider.getInstance().getVectorTypeSupport();
     private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
-    @Getter private final FieldInfo fieldInfo;
+    private final FieldInfo fieldInfo;
     private int lastDocID = -1;
     private final RandomAccessVectorValues randomAccessVectorValues;
     // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to

From 0fcac22fea2511a1d345d70eafd5f193a7f3f8f0 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:09:14 +0000
Subject: [PATCH 14/86] Remove lombok annotations from VectorIndexFieldMetadata

---
 .../sandbox/codecs/jvector/JVectorReader.java | 18 ++--
 .../sandbox/codecs/jvector/JVectorWriter.java | 86 +++++++++++--------
 2 files changed, 61 insertions(+), 43 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 35482cd91dbe..9ac1234fb258 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -228,7 +228,7 @@ private void readFields(ChecksumIndexInput meta) throws IOException {
       final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); // read field number
       JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata =
           new JVectorWriter.VectorIndexFieldMetadata(meta);
-      assert fieldInfo.number == vectorIndexFieldMetadata.getFieldNumber();
+      assert fieldInfo.number == vectorIndexFieldMetadata.fieldNumber;
       fieldEntryMap.put(fieldInfo.name, new FieldEntry(fieldInfo, vectorIndexFieldMetadata));
     }
   }
@@ -256,14 +256,14 @@ public FieldEntry(
         throws IOException {
       this.similarityFunction =
           VectorSimilarityMapper.ordToDistFunc(
-              vectorIndexFieldMetadata.getVectorSimilarityFunction().ordinal());
-      this.vectorEncoding = vectorIndexFieldMetadata.getVectorEncoding();
-      this.vectorIndexOffset = vectorIndexFieldMetadata.getVectorIndexOffset();
-      this.vectorIndexLength = vectorIndexFieldMetadata.getVectorIndexLength();
-      this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.getPqCodebooksAndVectorsLength();
-      this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.getPqCodebooksAndVectorsOffset();
-      this.dimension = vectorIndexFieldMetadata.getVectorDimension();
-      this.graphNodeIdToDocMap = vectorIndexFieldMetadata.getGraphNodeIdToDocMap();
+              vectorIndexFieldMetadata.vectorSimilarityFunction.ordinal());
+      this.vectorEncoding = vectorIndexFieldMetadata.vectorEncoding;
+      this.vectorIndexOffset = vectorIndexFieldMetadata.vectorIndexOffset;
+      this.vectorIndexLength = vectorIndexFieldMetadata.vectorIndexLength;
+      this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.pqCodebooksAndVectorsLength;
+      this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.pqCodebooksAndVectorsOffset;
+      this.dimension = vectorIndexFieldMetadata.vectorDimension;
+      this.graphNodeIdToDocMap = vectorIndexFieldMetadata.graphNodeIdToDocMap;
 
       this.vectorIndexFieldDataFileName =
           baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 9b9e8e7cfdb9..8c4679495f91 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -40,9 +40,6 @@
 import java.util.concurrent.ForkJoinPool;
 import java.util.function.Function;
 import java.util.stream.IntStream;
-import lombok.AllArgsConstructor;
-import lombok.Builder;
-import lombok.Value;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -328,15 +325,6 @@ private VectorIndexFieldMetadata writeGraph(
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
       final long startOffset = indexOutput.getFilePointer();
-
-      var resultBuilder =
-          VectorIndexFieldMetadata.builder()
-              .fieldNumber(fieldInfo.number)
-              .vectorEncoding(fieldInfo.getVectorEncoding())
-              .vectorSimilarityFunction(fieldInfo.getVectorSimilarityFunction())
-              .vectorDimension(randomAccessVectorValues.dimension())
-              .graphNodeIdToDocMap(graphNodeIdToDocMap);
-
       try (var writer =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
               .with(new InlineVectors(randomAccessVectorValues.dimension()))
@@ -348,25 +336,35 @@ private VectorIndexFieldMetadata writeGraph(
                     new InlineVectors.State(
                         randomAccessVectorValues.getVector(newToOldOrds[nodeId])));
         writer.write(suppliers);
-        long endGraphOffset = jVectorIndexWriter.position();
-        resultBuilder.vectorIndexOffset(startOffset);
-        resultBuilder.vectorIndexLength(endGraphOffset - startOffset);
+        final long endGraphOffset = jVectorIndexWriter.position();
 
         // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed
         // vectors
+        final long pqOffset;
+        final long pqLength;
         if (pqVectors != null) {
-          resultBuilder.pqCodebooksAndVectorsOffset(endGraphOffset);
+          pqOffset = endGraphOffset;
           // write the compressed vectors and codebooks to disk
           pqVectors.write(jVectorIndexWriter);
-          resultBuilder.pqCodebooksAndVectorsLength(jVectorIndexWriter.position() - endGraphOffset);
+          pqLength = jVectorIndexWriter.position() - endGraphOffset;
         } else {
-          resultBuilder.pqCodebooksAndVectorsOffset(0);
-          resultBuilder.pqCodebooksAndVectorsLength(0);
+          pqOffset = 0;
+          pqLength = 0;
         }
         CodecUtil.writeFooter(indexOutput);
-      }
 
-      return resultBuilder.build();
+        return new VectorIndexFieldMetadata(
+            fieldInfo.number,
+            fieldInfo.getVectorEncoding(),
+            fieldInfo.getVectorSimilarityFunction(),
+            randomAccessVectorValues.dimension(),
+            startOffset,
+            endGraphOffset - startOffset,
+            pqOffset,
+            pqLength,
+            degreeOverflow,
+            graphNodeIdToDocMap);
+      }
     }
   }
 
@@ -396,20 +394,40 @@ private PQVectors getPQVectors(
     return pqVectors;
   }
 
-  @Value
-  @Builder(toBuilder = true)
-  @AllArgsConstructor
   public static class VectorIndexFieldMetadata {
-    int fieldNumber;
-    VectorEncoding vectorEncoding;
-    VectorSimilarityFunction vectorSimilarityFunction;
-    int vectorDimension;
-    long vectorIndexOffset;
-    long vectorIndexLength;
-    long pqCodebooksAndVectorsOffset;
-    long pqCodebooksAndVectorsLength;
-    float degreeOverflow; // important when leveraging cache
-    GraphNodeIdToDocMap graphNodeIdToDocMap;
+    final int fieldNumber;
+    final VectorEncoding vectorEncoding;
+    final VectorSimilarityFunction vectorSimilarityFunction;
+    final int vectorDimension;
+    final long vectorIndexOffset;
+    final long vectorIndexLength;
+    final long pqCodebooksAndVectorsOffset;
+    final long pqCodebooksAndVectorsLength;
+    final float degreeOverflow; // important when leveraging cache
+    final GraphNodeIdToDocMap graphNodeIdToDocMap;
+
+    public VectorIndexFieldMetadata(
+        int fieldNumber,
+        VectorEncoding vectorEncoding,
+        VectorSimilarityFunction vectorSimilarityFunction,
+        int vectorDimension,
+        long vectorIndexOffset,
+        long vectorIndexLength,
+        long pqCodebooksAndVectorsOffset,
+        long pqCodebooksAndVectorsLength,
+        float degreeOverflow,
+        GraphNodeIdToDocMap graphNodeIdToDocMap) {
+      this.fieldNumber = fieldNumber;
+      this.vectorEncoding = vectorEncoding;
+      this.vectorSimilarityFunction = vectorSimilarityFunction;
+      this.vectorDimension = vectorDimension;
+      this.vectorIndexOffset = vectorIndexOffset;
+      this.vectorIndexLength = vectorIndexLength;
+      this.pqCodebooksAndVectorsOffset = pqCodebooksAndVectorsOffset;
+      this.pqCodebooksAndVectorsLength = pqCodebooksAndVectorsLength;
+      this.degreeOverflow = degreeOverflow;
+      this.graphNodeIdToDocMap = graphNodeIdToDocMap;
+    }
 
     public void toOutput(IndexOutput out) throws IOException {
       out.writeInt(fieldNumber);

From 384cde8549ea7e12f02ec4805cfdc07587fc093b Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:15:11 +0000
Subject: [PATCH 15/86] Fix illegal access to
 PerFieldKnnVectorsFormat.FieldReader

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 8c4679495f91..957d5f2acfa5 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -44,7 +44,6 @@
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
-import org.apache.lucene.codecs.perfield.PerFieldKnnVectorsFormat;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.*;
@@ -858,9 +857,8 @@ public void merge() throws IOException {
       final PQVectors pqVectors;
       final OnHeapGraphIndex graph;
       // Get the leading reader
-      PerFieldKnnVectorsFormat.FieldsReader fieldsReader =
-          (PerFieldKnnVectorsFormat.FieldsReader) readers[LEADING_READER_IDX];
-      JVectorReader leadingReader = (JVectorReader) fieldsReader.getFieldReader(fieldName);
+      final JVectorReader leadingReader =
+          (JVectorReader) readers[LEADING_READER_IDX].unwrapReaderForField(fieldName);
       final BuildScoreProvider buildScoreProvider;
       // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the
       // remaining vectors

From 4a5639280d031e5d25eb772265d1cb523684e3b9 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:20:47 +0000
Subject: [PATCH 16/86] Fix references to getCodec

---
 .../sandbox/codecs/jvector/KNNJVectorTests.java       | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index de9554f6bcff..764d99c49274 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -18,7 +18,6 @@
 package org.apache.lucene.sandbox.codecs.jvector;
 
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
-import static org.opensearch.knn.index.engine.CommonTestUtils.getCodec;
 
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
 import java.io.IOException;
@@ -27,6 +26,7 @@
 import java.util.concurrent.*;
 import java.util.concurrent.atomic.AtomicBoolean;
 import java.util.concurrent.atomic.AtomicInteger;
+import org.apache.lucene.codecs.Codec;
 import org.apache.lucene.document.*;
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.*;
@@ -34,6 +34,7 @@
 import org.apache.lucene.store.FSDirectory;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
+import org.apache.lucene.tests.util.TestUtil;
 import org.junit.Assert;
 import org.junit.Test;
 import org.opensearch.knn.TestUtils;
@@ -1561,4 +1562,12 @@ private static Set<Integer> calculateGroundTruthVectorsIds(
 
     return groundTruthVectorsIds;
   }
+
+  private Codec getCodec() {
+    return getCodec(JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION);
+  }
+
+  private Codec getCodec(final int minimumBatchSizeForQuantization) {
+    return TestUtil.alwaysKnnVectorsFormat(new JVectorFormat(minimumBatchSizeForQuantization));
+  }
 }

From 585e7772c110ab5de85836bfd714eb6c61ed76c8 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 19:34:01 +0000
Subject: [PATCH 17/86] Fix references to TestUtils.generateRandomVectors

---
 .../codecs/jvector/KNNJVectorTests.java       | 32 ++++++++++++-------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index 764d99c49274..f9d3e5f756ae 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -37,7 +37,6 @@
 import org.apache.lucene.tests.util.TestUtil;
 import org.junit.Assert;
 import org.junit.Test;
-import org.opensearch.knn.TestUtils;
 import org.opensearch.knn.index.ThreadLeakFiltersForTests;
 
 /** Test used specifically for JVector */
@@ -506,8 +505,8 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
     int k = 3; // The number of nearest neighbors to gather
     final int dimension = 2;
     final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
-    final float[] target = TestUtils.generateRandomVectors(1, dimension)[0];
-    final float[][] source = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+    final float[] target = generateRandomVectors(1, dimension)[0];
+    final float[][] source = generateRandomVectors(totalNumberOfDocs, dimension);
     final Set<Integer> groundTruthVectorsIds =
         calculateGroundTruthVectorsIds(target, source, k, vectorSimilarityFunction);
 
@@ -586,7 +585,7 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
     final String floatVectorField = "vec";
     final String expectedDocIdField = "expectedDocId";
     final Path indexPath = createTempDir();
-    final float[][] sourceVectors = TestUtils.generateRandomVectors(numDocs, 2);
+    final float[][] sourceVectors = generateRandomVectors(numDocs, 2);
     final VectorSimilarityFunction vectorSimilarityFunction = VectorSimilarityFunction.EUCLIDEAN;
 
     try (Directory dir = newFSDirectory(indexPath)) {
@@ -660,7 +659,7 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
         final FloatVectorValues vectorValues = context.reader().getFloatVectorValues("vec");
         final int k = 1;
         for (int i = 0; i < reader.maxDoc(); i++) {
-          float[] query = TestUtils.generateRandomVectors(1, 2)[0];
+          float[] query = generateRandomVectors(1, 2)[0];
           TopDocs td =
               searcher.search(
                   getJVectorKnnFloatVectorQuery("vec", query, k, new MatchAllDocsQuery()), k);
@@ -687,7 +686,7 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
 
                   try {
                     for (i = 0; i < queriesPerThread && !failureDetected.get(); i++) {
-                      float[] query = TestUtils.generateRandomVectors(1, 2)[0];
+                      float[] query = generateRandomVectors(1, 2)[0];
                       try {
                         TopDocs td = searcher.search(new KnnFloatVectorQuery("vec", query, k), k);
                         assertEquals(
@@ -1057,7 +1056,7 @@ public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final float[][] vectors = generateRandomVectors(totalNumberOfDocs, dimension);
       final Set<Integer> groundTruthVectorsIds =
           calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
       for (int i = 0; i < vectors.length; i++) {
@@ -1187,7 +1186,7 @@ public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() th
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final float[][] vectors = generateRandomVectors(totalNumberOfDocs, dimension);
       final Set<Integer> groundTruthVectorsIds =
           calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
 
@@ -1260,7 +1259,7 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges(
     try (FSDirectory dir = FSDirectory.open(indexPath);
         IndexWriter w = new IndexWriter(dir, indexWriterConfig)) {
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
-      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final float[][] vectors = generateRandomVectors(totalNumberOfDocs, dimension);
       final Set<Integer> groundTruthVectorsIds =
           calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
       for (int i = 0; i < totalNumberOfDocs; i++) {
@@ -1337,7 +1336,7 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
       // We will use random vectors because otherwise PQ will have a correlated subspaces which will
       // result in a broken linear graph
-      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final float[][] vectors = generateRandomVectors(totalNumberOfDocs, dimension);
       final Set<Integer> groundTruthVectorsIds =
           calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
       for (int i = 0; i < totalNumberOfDocs; i++) {
@@ -1413,7 +1412,7 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
       final float[] target = generateZerosVectorWithLastValue(dimension, 0);
       // We will use random vectors because otherwise PQ will have a correlated subspaces which will
       // result in a broken linear graph
-      final float[][] vectors = TestUtils.generateRandomVectors(totalNumberOfDocs, dimension);
+      final float[][] vectors = generateRandomVectors(totalNumberOfDocs, dimension);
       final Set<Integer> groundTruthVectorsIds =
           calculateGroundTruthVectorsIds(target, vectors, k, vectorSimilarityFunction);
       for (int i = 0; i < totalNumberOfDocs; i++) {
@@ -1563,6 +1562,17 @@ private static Set<Integer> calculateGroundTruthVectorsIds(
     return groundTruthVectorsIds;
   }
 
+  static float[][] generateRandomVectors(int count, int dimension) {
+    final var rng = nonAssertingRandom(random());
+    final float[][] vectors = new float[count][dimension];
+    for (int i = 0; i < vectors.length; ++i) {
+      for (int j = 0; j < vectors[i].length; ++j) {
+        vectors[i][j] = rng.nextFloat();
+      }
+    }
+    return vectors;
+  }
+
   private Codec getCodec() {
     return getCodec(JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION);
   }

From 8ddcddb19bf624077a94a422be0e205c3ee019a7 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 19:14:03 +0000
Subject: [PATCH 18/86] Fix ThreadLeakFilters in test

---
 .../sandbox/codecs/jvector/KNNJVectorTests.java       | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index f9d3e5f756ae..f5093abd9770 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -19,6 +19,7 @@
 
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
 
+import com.carrotsearch.randomizedtesting.ThreadFilter;
 import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
 import java.io.IOException;
 import java.nio.file.Path;
@@ -37,7 +38,6 @@
 import org.apache.lucene.tests.util.TestUtil;
 import org.junit.Assert;
 import org.junit.Test;
-import org.opensearch.knn.index.ThreadLeakFiltersForTests;
 
 /** Test used specifically for JVector */
 // Currently {@link IndexGraphBuilder} is using the default ForkJoinPool.commonPool() which is not
@@ -47,7 +47,7 @@
 // due to leaked thread pool warning.
 @ThreadLeakFilters(
     defaultFilters = true,
-    filters = {ThreadLeakFiltersForTests.class})
+    filters = {KNNJVectorTests.ThreadLeakFilter.class})
 public class KNNJVectorTests extends LuceneTestCase {
   private static final String TEST_FIELD = "test_field";
   private static final String TEST_ID_FIELD = "id";
@@ -1580,4 +1580,11 @@ private Codec getCodec() {
   private Codec getCodec(final int minimumBatchSizeForQuantization) {
     return TestUtil.alwaysKnnVectorsFormat(new JVectorFormat(minimumBatchSizeForQuantization));
   }
+
+  public static class ThreadLeakFilter implements ThreadFilter {
+    @Override
+    public boolean reject(Thread thread) {
+      return thread.getName().contains("ForkJoinPool");
+    }
+  }
 }

From 25cd540a7504df6789831a1da196e3542e3ba6c2 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:31:59 +0000
Subject: [PATCH 19/86] Fix missing @Override

---
 .../lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java | 1 +
 .../sandbox/codecs/jvector/JVectorRandomAccessReader.java       | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index c4039c6d12b9..bc34d9141463 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -64,6 +64,7 @@ public VectorFloat<?> vectorFloatValue(int ord) {
     return view.getVector(ord);
   }
 
+  @Override
   public DocIndexIterator iterator() {
     return new DocIndexIterator() {
       private int docId = -1;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 97f7cec66dec..6c1519a3b04e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -59,7 +59,7 @@ public float readFloat() throws IOException {
   }
 
   // TODO: bring back to override when upgrading jVector again
-  // @Override
+  @Override
   public long readLong() throws IOException {
     return indexInputDelegate.readLong();
   }

From ee5ed2d71399ed08a3934cf470a4864ba38fdaae Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:42:24 +0000
Subject: [PATCH 20/86] Remove unused members

---
 .../codecs/jvector/JVectorRandomAccessReader.java     |  3 ---
 .../lucene/sandbox/codecs/jvector/JVectorReader.java  |  5 -----
 .../lucene/sandbox/codecs/jvector/JVectorWriter.java  |  3 ---
 .../sandbox/codecs/jvector/KNNJVectorTests.java       | 11 -----------
 4 files changed, 22 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 6c1519a3b04e..c3017aca8ffa 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -30,9 +30,7 @@
 
 public class JVectorRandomAccessReader implements RandomAccessReader {
   private final byte[] internalBuffer = new byte[Long.BYTES];
-  private final byte[] internalFloatBuffer = new byte[Float.BYTES];
   private final IndexInput indexInputDelegate;
-  private volatile boolean closed = false;
 
   public JVectorRandomAccessReader(IndexInput indexInputDelegate) {
     this.indexInputDelegate = indexInputDelegate;
@@ -119,7 +117,6 @@ public void read(float[] floats, int offset, int count) throws IOException {
 
   @Override
   public void close() throws IOException {
-    this.closed = true;
     // no need to really close the index input delegate since it is a clone
   }
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 9ac1234fb258..21e6fd918fe1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -234,10 +234,7 @@ private void readFields(ChecksumIndexInput meta) throws IOException {
   }
 
   class FieldEntry implements Closeable {
-    private final FieldInfo fieldInfo;
-    private final VectorEncoding vectorEncoding;
     private final VectorSimilarityFunction similarityFunction;
-    private final int dimension;
     private final long vectorIndexOffset;
     private final long vectorIndexLength;
     private final long pqCodebooksAndVectorsLength;
@@ -257,12 +254,10 @@ public FieldEntry(
       this.similarityFunction =
           VectorSimilarityMapper.ordToDistFunc(
               vectorIndexFieldMetadata.vectorSimilarityFunction.ordinal());
-      this.vectorEncoding = vectorIndexFieldMetadata.vectorEncoding;
       this.vectorIndexOffset = vectorIndexFieldMetadata.vectorIndexOffset;
       this.vectorIndexLength = vectorIndexFieldMetadata.vectorIndexLength;
       this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.pqCodebooksAndVectorsLength;
       this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.pqCodebooksAndVectorsOffset;
-      this.dimension = vectorIndexFieldMetadata.vectorDimension;
       this.graphNodeIdToDocMap = vectorIndexFieldMetadata.graphNodeIdToDocMap;
 
       this.vectorIndexFieldDataFileName =
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 957d5f2acfa5..55c1fa163d79 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -584,9 +584,6 @@ class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
     private static final int READER_ORD = 1;
     private static final int LEADING_READER_IDX = 0;
 
-    private final VectorTypeSupport VECTOR_TYPE_SUPPORT =
-        VectorizationProvider.getInstance().getVectorTypeSupport();
-
     // Array of sub-readers
     private final KnnVectorsReader[] readers;
     private final JVectorFloatVectorValues[] perReaderFloatVectorValues;
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index f5093abd9770..de4ae5283371 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -959,8 +959,6 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOE
    */
   @Test
   public void testJVectorKnnIndex_simpleCase_withBinaryVector() throws IOException {
-    int k = 3; // The number of nearest neighbours to gather
-    int totalNumberOfDocs = 10;
     IndexWriterConfig indexWriterConfig = LuceneTestCase.newIndexWriterConfig();
     // TODO: re-enable this after fixing the compound file augmentation for JVector
     indexWriterConfig.setUseCompoundFile(false);
@@ -1487,15 +1485,6 @@ private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
         JVectorReader.DEFAULT_QUERY_USE_PRUNING);
   }
 
-  private static float[][] getMonotonicallyIncreasingVectors(int numVectors, int vectorDimension) {
-    float[][] vectors = new float[numVectors][vectorDimension];
-    for (int i = 0; i < numVectors; i++) {
-      vectors[i] = generateZerosVectorWithLastValue(vectorDimension, i);
-    }
-
-    return vectors;
-  }
-
   private static float[] generateZerosVectorWithLastValue(int vectorDimension, int lastValue) {
     float[] vector = new float[vectorDimension];
     for (int i = 0; i < vectorDimension - 1; i++) {

From 3c47063c097b96a5fd5d797a57b3ee87918635f3 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:48:42 +0000
Subject: [PATCH 21/86] Fix unqualified javadoc

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 55c1fa163d79..a2bedb0cca31 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -80,8 +80,8 @@
  * jVector ordinals and the new Lucene document IDs. This is achieved by keeping checkpoints of the
  * {@link GraphNodeIdToDocMap} class in the index metadata and allowing us to update the mapping as
  * needed across merges by constructing a new mapping from the previous mapping and the {@link
- * MergeState.DocMap} provided in the {@link MergeState}. And across sorts with {@link
- * GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
+ * org.apache.lucene.index.MergeState.DocMap} provided in the {@link MergeState}. And across sorts
+ * with {@link GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
  */
 public class JVectorWriter extends KnnVectorsWriter {
   private static final long SHALLOW_RAM_BYTES_USED =

From af5e0bedf3bb14fa264f626756b33d195cc3a8a1 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 17:49:43 +0000
Subject: [PATCH 22/86] Suppress cases-omitted from switch expression

---
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java  | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index a2bedb0cca31..8b0eacf0656b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -557,6 +557,7 @@ static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimila
       case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
       case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
       case DOT_PRODUCT -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.DOT_PRODUCT;
+      // $CASES-OMITTED$
       default ->
           throw new IllegalArgumentException(
               "Unsupported similarity function: " + fieldInfo.getVectorSimilarityFunction());

From 4b2beb89d4dcbcdf9afd13357174b70929e1692f Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 18:56:55 +0000
Subject: [PATCH 23/86] Add basic javadocs for classes without

---
 .../sandbox/codecs/jvector/JVectorFloatVectorValues.java      | 4 ++--
 .../apache/lucene/sandbox/codecs/jvector/JVectorFormat.java   | 1 +
 .../sandbox/codecs/jvector/JVectorRandomAccessReader.java     | 1 +
 .../apache/lucene/sandbox/codecs/jvector/JVectorReader.java   | 1 +
 .../lucene/sandbox/codecs/jvector/JVectorVectorScorer.java    | 1 +
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java   | 1 +
 6 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index bc34d9141463..df9b71a385b8 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -27,6 +27,7 @@
 import org.apache.lucene.index.FloatVectorValues;
 import org.apache.lucene.search.VectorScorer;
 
+/// Implements Lucene vector access over a JVector on-disk index
 public class JVectorFloatVectorValues extends FloatVectorValues {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
@@ -88,8 +89,7 @@ public int docID() {
       @Override
       public int nextDoc() throws IOException {
         // Advance to the next node docId starts from -1 which is why we need to increment docId by
-        // 1 "size"
-        // times
+        // 1 "size" times
         while (docId < size - 1) {
           docId++;
           if (liveNodes.get(docId)) {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 1d208d31415f..75aa58ba0181 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -27,6 +27,7 @@
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
 
+/// Implements K-NN search using JVector library for indexing
 public class JVectorFormat extends KnnVectorsFormat {
   public static final String NAME = "JVectorFormat";
   public static final String META_CODEC_NAME = "JVectorVectorsFormatMeta";
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index c3017aca8ffa..de87f451f5c8 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.util.IOUtils;
 
+/// Implements JVector reader capabilities over a Lucene IndexInput
 public class JVectorRandomAccessReader implements RandomAccessReader {
   private final byte[] internalBuffer = new byte[Long.BYTES];
   private final IndexInput indexInputDelegate;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 21e6fd918fe1..753d321a6429 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -46,6 +46,7 @@
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.IOUtils;
 
+/// Implements KnnVectorsReader over an on-disk JVector index serialized using {@link JVectorWriter}
 public class JVectorReader extends KnnVectorsReader {
   public static final float DEFAULT_QUERY_SIMILARITY_THRESHOLD = 0f;
   public static final float DEFAULT_QUERY_RERANK_FLOOR = 0f;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
index cc6f3e6d6bff..8c9006dd0901 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
@@ -24,6 +24,7 @@
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.VectorScorer;
 
+/// Implements Lucene scoring over a JVector index
 public class JVectorVectorScorer implements VectorScorer {
   private final JVectorFloatVectorValues floatVectorValues;
   private final KnnVectorValues.DocIndexIterator docIndexIterator;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 8b0eacf0656b..0bf0ea618370 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -393,6 +393,7 @@ private PQVectors getPQVectors(
     return pqVectors;
   }
 
+  /// Metadata about the index to be persisted on disk
   public static class VectorIndexFieldMetadata {
     final int fieldNumber;
     final VectorEncoding vectorEncoding;

From f97bfc51394d7bd1cdd36930d7982829942630cc Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 19:03:33 +0000
Subject: [PATCH 24/86] Fix forbiddenApis error

---
 .../apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
index de4ae5283371..1931d0a4e2d0 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
@@ -36,6 +36,7 @@
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.TestUtil;
+import org.apache.lucene.util.NamedThreadFactory;
 import org.junit.Assert;
 import org.junit.Test;
 
@@ -673,7 +674,8 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
         // not exhausting the file handles
         int numThreads = 10; // Number of concurrent search threads
         int queriesPerThread = 100; // Number of searches per thread
-        ExecutorService executor = Executors.newFixedThreadPool(numThreads);
+        ExecutorService executor =
+            Executors.newFixedThreadPool(numThreads, new NamedThreadFactory("KNNJVectorTests"));
         CountDownLatch latch = new CountDownLatch(numThreads);
         AtomicBoolean failureDetected = new AtomicBoolean(false);
         AtomicInteger totalQueries = new AtomicInteger(0);

From 17e211f5d5b6b498c0ea4a7762a9068a5bc349b2 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 19:05:51 +0000
Subject: [PATCH 25/86] Rename KNNJVectorTests

---
 .../jvector/{KNNJVectorTests.java => TestJVectorFormat.java}  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)
 rename lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/{KNNJVectorTests.java => TestJVectorFormat.java} (99%)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
similarity index 99%
rename from lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
rename to lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
index 1931d0a4e2d0..0524a24af7a0 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/KNNJVectorTests.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
@@ -48,8 +48,8 @@
 // due to leaked thread pool warning.
 @ThreadLeakFilters(
     defaultFilters = true,
-    filters = {KNNJVectorTests.ThreadLeakFilter.class})
-public class KNNJVectorTests extends LuceneTestCase {
+    filters = {TestJVectorFormat.ThreadLeakFilter.class})
+public class TestJVectorFormat extends LuceneTestCase {
   private static final String TEST_FIELD = "test_field";
   private static final String TEST_ID_FIELD = "id";
 

From c2c8082fd158f593c8043288f85c8e77aa128fa9 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 19:37:04 +0000
Subject: [PATCH 26/86] Fix missing @Test annotations

---
 .../apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
index 0524a24af7a0..18ac79e1da3f 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
@@ -110,6 +110,7 @@ public void testJVectorKnnIndex_simpleCase() throws IOException {
   }
 
   /** Test the scenario when not all documents are populated with the vector field */
+  @Test
   public void testMissing_fields() throws IOException {
     final int k = 3; // The number of nearest neighbors to gather
     final int totalNumberOfDocs = 10;
@@ -170,6 +171,7 @@ public void testMissing_fields() throws IOException {
    *
    * @throws IOException if an I/O error occurs
    */
+  @Test
   public void test_sorted_index() throws IOException {
     final int k = 3; // The number of nearest neighbors to gather
     final int totalNumberOfDocs = 10;

From d26e1f67a37720922eaf4e0f804226f84e2019e6 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 29 Oct 2025 20:11:26 +0000
Subject: [PATCH 27/86] Use JVectorSearchStrategy to plumb search parameters to
 JVectorReader

---
 .../codecs/jvector/JVectorKnnCollector.java   |  91 -------------
 .../jvector/JVectorKnnFloatVectorQuery.java   |  94 --------------
 .../sandbox/codecs/jvector/JVectorReader.java |  38 ++----
 .../codecs/jvector/JVectorSearchStrategy.java | 121 ++++++++++++++++++
 .../codecs/jvector/TestJVectorFormat.java     |  97 ++++++--------
 5 files changed, 177 insertions(+), 264 deletions(-)
 delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
 delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
 create mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorSearchStrategy.java

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
deleted file mode 100644
index d2fad6532570..000000000000
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnCollector.java
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.sandbox.codecs.jvector;
-
-import org.apache.lucene.search.KnnCollector;
-import org.apache.lucene.search.TopDocs;
-import org.apache.lucene.search.knn.KnnSearchStrategy;
-
-/**
- * Wrapper class for KnnCollector that provides passing of additional parameters specific for
- * JVector.
- */
-public class JVectorKnnCollector implements KnnCollector {
-  final KnnCollector delegate;
-  final float threshold;
-  final float rerankFloor;
-  final int overQueryFactor;
-  final boolean usePruning;
-
-  public JVectorKnnCollector(
-      KnnCollector delegate,
-      float threshold,
-      float rerankFloor,
-      int overQueryFactor,
-      boolean usePruning) {
-    this.delegate = delegate;
-    this.threshold = threshold;
-    this.rerankFloor = rerankFloor;
-    this.overQueryFactor = overQueryFactor;
-    this.usePruning = usePruning;
-  }
-
-  @Override
-  public boolean earlyTerminated() {
-    return delegate.earlyTerminated();
-  }
-
-  @Override
-  public void incVisitedCount(int count) {
-    delegate.incVisitedCount(count);
-  }
-
-  @Override
-  public long visitedCount() {
-    return delegate.visitedCount();
-  }
-
-  @Override
-  public long visitLimit() {
-    return delegate.visitLimit();
-  }
-
-  @Override
-  public int k() {
-    return delegate.k();
-  }
-
-  @Override
-  public boolean collect(int docId, float similarity) {
-    return delegate.collect(docId, similarity);
-  }
-
-  @Override
-  public float minCompetitiveSimilarity() {
-    return delegate.minCompetitiveSimilarity();
-  }
-
-  @Override
-  public TopDocs topDocs() {
-    return delegate.topDocs();
-  }
-
-  @Override
-  public KnnSearchStrategy getSearchStrategy() {
-    return delegate.getSearchStrategy();
-  }
-}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
deleted file mode 100644
index 50246250ad60..000000000000
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorKnnFloatVectorQuery.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.lucene.sandbox.codecs.jvector;
-
-import java.io.IOException;
-import org.apache.lucene.index.FloatVectorValues;
-import org.apache.lucene.index.LeafReader;
-import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.*;
-import org.apache.lucene.search.knn.KnnCollectorManager;
-import org.apache.lucene.search.knn.KnnSearchStrategy;
-
-/**
- * {@link KnnFloatVectorQuery} that uses jVector to perform the search. We use this wrapper simply
- * because we can't pass jVector specific parameters with the upstream {@link KnnFloatVectorQuery}.
- */
-public class JVectorKnnFloatVectorQuery extends KnnFloatVectorQuery {
-  private static final TopDocs NO_RESULTS = TopDocsCollector.EMPTY_TOPDOCS;
-  private final int overQueryFactor;
-  private final float threshold;
-  private final float rerankFloor;
-  private final boolean usePruning;
-
-  public JVectorKnnFloatVectorQuery(
-      String field,
-      float[] target,
-      int k,
-      int overQueryFactor,
-      float threshold,
-      float rerankFloor,
-      boolean usePruning) {
-    super(field, target, k);
-    this.overQueryFactor = overQueryFactor;
-    this.threshold = threshold;
-    this.rerankFloor = rerankFloor;
-    this.usePruning = usePruning;
-  }
-
-  public JVectorKnnFloatVectorQuery(
-      String field,
-      float[] target,
-      int k,
-      Query filter,
-      int overQueryFactor,
-      float threshold,
-      float rerankFloor,
-      boolean usePruning) {
-    super(field, target, k, filter);
-    this.overQueryFactor = overQueryFactor;
-    this.threshold = threshold;
-    this.rerankFloor = rerankFloor;
-    this.usePruning = usePruning;
-  }
-
-  @Override
-  protected TopDocs approximateSearch(
-      LeafReaderContext context,
-      AcceptDocs acceptDocs,
-      int visitedLimit,
-      KnnCollectorManager knnCollectorManager)
-      throws IOException {
-    final KnnCollector delegateCollector =
-        knnCollectorManager.newCollector(visitedLimit, KnnSearchStrategy.Hnsw.DEFAULT, context);
-    final KnnCollector knnCollector =
-        new JVectorKnnCollector(
-            delegateCollector, threshold, rerankFloor, overQueryFactor, usePruning);
-    LeafReader reader = context.reader();
-    FloatVectorValues floatVectorValues = reader.getFloatVectorValues(field);
-    if (floatVectorValues == null) {
-      FloatVectorValues.checkField(reader, field);
-      return NO_RESULTS;
-    }
-    if (Math.min(knnCollector.k(), floatVectorValues.size()) == 0) {
-      return NO_RESULTS;
-    }
-    reader.searchNearestVectors(field, getTargetCopy(), knnCollector, acceptDocs);
-    TopDocs results = knnCollector.topDocs();
-    return results != null ? results : NO_RESULTS;
-  }
-}
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 753d321a6429..a2abcda40c3d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -43,16 +43,12 @@
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.AcceptDocs;
 import org.apache.lucene.search.KnnCollector;
+import org.apache.lucene.search.knn.KnnSearchStrategy;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.IOUtils;
 
 /// Implements KnnVectorsReader over an on-disk JVector index serialized using {@link JVectorWriter}
 public class JVectorReader extends KnnVectorsReader {
-  public static final float DEFAULT_QUERY_SIMILARITY_THRESHOLD = 0f;
-  public static final float DEFAULT_QUERY_RERANK_FLOOR = 0f;
-  public static final int DEFAULT_OVER_QUERY_FACTOR = 3;
-  public static final boolean DEFAULT_QUERY_USE_PRUNING = false;
-
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
 
@@ -145,20 +141,14 @@ public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
   public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs)
       throws IOException {
     final OnDiskGraphIndex index = fieldEntryMap.get(field).index;
-    final JVectorKnnCollector jvectorKnnCollector;
-    if (knnCollector instanceof JVectorKnnCollector) {
-      jvectorKnnCollector = (JVectorKnnCollector) knnCollector;
-    } else {
-      // KnnCollector must be of type JVectorKnnCollector, for now we will re-wrap it but this is
-      // not ideal
-      jvectorKnnCollector =
-          new JVectorKnnCollector(
-              knnCollector,
-              DEFAULT_QUERY_SIMILARITY_THRESHOLD,
-              DEFAULT_QUERY_RERANK_FLOOR,
-              DEFAULT_OVER_QUERY_FACTOR,
-              DEFAULT_QUERY_USE_PRUNING);
-    }
+
+    final JVectorSearchStrategy searchStrategy;
+    if (knnCollector.getSearchStrategy() instanceof JVectorSearchStrategy strategy) {
+      searchStrategy = strategy;
+    } else if (knnCollector.getSearchStrategy() instanceof KnnSearchStrategy.Seeded seeded
+        && seeded.originalStrategy() instanceof JVectorSearchStrategy strategy) {
+      searchStrategy = strategy;
+    } else searchStrategy = JVectorSearchStrategy.DEFAULT;
 
     // search for a random vector using a GraphSearcher and SearchScoreProvider
     VectorFloat<?> q = VECTOR_TYPE_SUPPORT.createFloatVector(target);
@@ -197,13 +187,13 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
         final var searchResults =
             graphSearcher.search(
                 ssp,
-                jvectorKnnCollector.k(),
-                jvectorKnnCollector.k() * jvectorKnnCollector.overQueryFactor,
-                jvectorKnnCollector.threshold,
-                jvectorKnnCollector.rerankFloor,
+                knnCollector.k(),
+                knnCollector.k() * searchStrategy.overQueryFactor,
+                searchStrategy.threshold,
+                searchStrategy.rerankFloor,
                 compatibleBits);
         for (SearchResult.NodeScore ns : searchResults.getNodes()) {
-          jvectorKnnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
+          knnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
         }
       }
     }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorSearchStrategy.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorSearchStrategy.java
new file mode 100644
index 000000000000..1f713a8b214b
--- /dev/null
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorSearchStrategy.java
@@ -0,0 +1,121 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.sandbox.codecs.jvector;
+
+import java.util.Locale;
+import java.util.Objects;
+import org.apache.lucene.search.knn.KnnSearchStrategy;
+
+/// Defines query-time parameters for searching a JVector index to be passed into
+/// [`search()`][JVectorReader#search] via [`KnnCollector`][org.apache.lucene.search.KnnCollector].
+public class JVectorSearchStrategy extends KnnSearchStrategy {
+  static final float DEFAULT_QUERY_SIMILARITY_THRESHOLD = 0f;
+  static final float DEFAULT_QUERY_RERANK_FLOOR = 0f;
+  static final int DEFAULT_OVER_QUERY_FACTOR = 3;
+  static final boolean DEFAULT_QUERY_USE_PRUNING = false;
+
+  public static final JVectorSearchStrategy DEFAULT =
+      new JVectorSearchStrategy(
+          DEFAULT_QUERY_SIMILARITY_THRESHOLD,
+          DEFAULT_QUERY_RERANK_FLOOR,
+          DEFAULT_OVER_QUERY_FACTOR,
+          DEFAULT_QUERY_USE_PRUNING);
+
+  final float threshold;
+  final float rerankFloor;
+  final int overQueryFactor;
+  final boolean usePruning;
+
+  private JVectorSearchStrategy(
+      float threshold, float rerankFloor, int overQueryFactor, boolean usePruning) {
+    this.threshold = threshold;
+    this.rerankFloor = rerankFloor;
+    this.overQueryFactor = overQueryFactor;
+    this.usePruning = usePruning;
+  }
+
+  @Override
+  public String toString() {
+    return String.format(
+        Locale.ROOT,
+        "%s[threshold=%f, rerankFloor=%f, overQueryFactor=%d, usePruning=%s]",
+        getClass().getSimpleName(),
+        threshold,
+        rerankFloor,
+        overQueryFactor,
+        usePruning);
+  }
+
+  @Override
+  public boolean equals(Object obj) {
+    if (obj == this) {
+      return true;
+    } else if (obj instanceof JVectorSearchStrategy other) {
+      return this.threshold == other.threshold
+          && this.rerankFloor == other.rerankFloor
+          && this.overQueryFactor == other.overQueryFactor
+          && this.usePruning == other.usePruning;
+    } else return false;
+  }
+
+  @Override
+  public int hashCode() {
+    return Objects.hash(getClass(), threshold, rerankFloor, overQueryFactor, usePruning);
+  }
+
+  @Override
+  public void nextVectorsBlock() {}
+
+  public static Builder builder() {
+    return new Builder();
+  }
+
+  /// Builder for defining a [JVectorSearchStrategy].
+  public static class Builder {
+    private float threshold = DEFAULT_QUERY_SIMILARITY_THRESHOLD;
+    private float rerankFloor = DEFAULT_QUERY_RERANK_FLOOR;
+    private int overQueryFactor = DEFAULT_OVER_QUERY_FACTOR;
+    private boolean usePruning = DEFAULT_QUERY_USE_PRUNING;
+
+    private Builder() {}
+
+    public Builder withThreshold(float threshold) {
+      this.threshold = threshold;
+      return this;
+    }
+
+    public Builder withRerankFloor(float rerankFloor) {
+      this.rerankFloor = rerankFloor;
+      return this;
+    }
+
+    public Builder withOverQueryFactor(int overQueryFactor) {
+      this.overQueryFactor = overQueryFactor;
+      return this;
+    }
+
+    public Builder withUsePruning(boolean usePruning) {
+      this.usePruning = usePruning;
+      return this;
+    }
+
+    public JVectorSearchStrategy build() {
+      return new JVectorSearchStrategy(threshold, rerankFloor, overQueryFactor, usePruning);
+    }
+  }
+}
diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
index 18ac79e1da3f..0c46a50a8b61 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
@@ -86,8 +86,8 @@ public void testJVectorKnnIndex_simpleCase() throws IOException {
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         assertEquals(9, topDocs.scoreDocs[0].doc);
@@ -142,8 +142,8 @@ public void testMissing_fields() throws IOException {
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         assertEquals(0, topDocs.scoreDocs[0].doc);
@@ -208,8 +208,8 @@ public void test_sorted_index() throws IOException {
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         assertEquals(9, topDocs.scoreDocs[0].doc);
@@ -286,7 +286,7 @@ public void testJVectorKnnIndex_multipleSegments() throws IOException {
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
+        final KnnFloatVectorQuery knnFloatVectorQuery =
             new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
@@ -343,8 +343,8 @@ public void testJVectorKnnIndex_mergeEnabled() throws IOException {
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
@@ -405,8 +405,8 @@ public void multipleMerges() throws IOException {
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         Document doc = reader.storedFields().document(topDocs.scoreDocs[0].doc);
@@ -479,8 +479,8 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
 
@@ -548,8 +548,8 @@ public void testJVectorKnnIndex_multiple_merges_large_batches_no_quantization()
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
@@ -664,8 +664,7 @@ public void testLuceneKnnIndex_multipleMerges_with_ordering_check()
         for (int i = 0; i < reader.maxDoc(); i++) {
           float[] query = generateRandomVectors(1, 2)[0];
           TopDocs td =
-              searcher.search(
-                  getJVectorKnnFloatVectorQuery("vec", query, k, new MatchAllDocsQuery()), k);
+              searcher.search(new KnnFloatVectorQuery("vec", query, k, new MatchAllDocsQuery()), k);
           assertEquals(k, td.scoreDocs.length);
 
           compareSearchResults(
@@ -827,7 +826,7 @@ public void deletedDocs() throws IOException {
           final float[] target = {0.0f, 1.0f * (i + docToDeleteInEachBatch)};
           final IndexSearcher searcher = newSearcher(reader);
           final KnnFloatVectorQuery knnFloatVectorQuery =
-              getJVectorKnnFloatVectorQuery("test_field", target, k, new MatchAllDocsQuery());
+              new KnnFloatVectorQuery("test_field", target, k, new MatchAllDocsQuery());
           TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
           assertEquals(k, topDocs.totalHits.value());
           for (int j = 0; j < k; j++) {
@@ -877,8 +876,8 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile() throws IOExceptio
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         assertEquals(9, topDocs.scoreDocs[0].doc);
@@ -934,8 +933,8 @@ public void testLuceneKnnIndex_mergeEnabled_withCompoundFile_cosine() throws IOE
         Assert.assertEquals(totalNumberOfDocs, reader.numDocs());
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         assertEquals(0, topDocs.scoreDocs[0].doc);
@@ -1007,8 +1006,8 @@ public void testJVectorKnnIndex_withFilter() throws IOException {
       try (IndexReader reader = DirectoryReader.open(w)) {
         final Query filterQuery = new TermQuery(new Term("filter_field", "even"));
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
 
         assertEquals(k, topDocs.totalHits.value());
@@ -1077,8 +1076,8 @@ public void testJVectorKnnIndex_simpleCase_withQuantization() throws IOException
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery(TEST_FIELD, target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
@@ -1134,16 +1133,19 @@ public void testJVectorKnnIndex_simpleCase_withQuantization_rerank() throws IOEx
                 });
 
         // Query with essentially no reranking and expect recall to be very low
+        JVectorSearchStrategy searchStrategy =
+            JVectorSearchStrategy.builder().withOverQueryFactor(1).build();
         KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1);
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery, searchStrategy);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
 
         final float recallWithLowOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
 
         // Query with reranking and expect recall to be high
+        searchStrategy = JVectorSearchStrategy.builder().withOverQueryFactor(5).build();
         knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 5);
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery, searchStrategy);
         topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         float recallWithHighOverqueryFactor = calculateRecall(topDocs, expectedMinScoreInTopK);
@@ -1211,8 +1213,8 @@ public void testJVectorKnnIndex_happyCase_withQuantization_multipleSegments() th
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
@@ -1284,8 +1286,8 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges(
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery);
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
@@ -1362,8 +1364,10 @@ public void testJVectorKnnIndex_mixedBatchSizes_withQuantization_multipleMerges_
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
+        final JVectorSearchStrategy searchStrategy =
+            JVectorSearchStrategy.builder().withOverQueryFactor(1000).build();
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery, searchStrategy);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
@@ -1439,8 +1443,10 @@ public void testJVectorKnnIndex_withQuantization_withCompoundFile_with_refinemen
 
         final Query filterQuery = new MatchAllDocsQuery();
         final IndexSearcher searcher = newSearcher(reader);
-        KnnFloatVectorQuery knnFloatVectorQuery =
-            getJVectorKnnFloatVectorQuery("test_field", target, k, filterQuery, 1000);
+        final JVectorSearchStrategy searchStrategy =
+            JVectorSearchStrategy.builder().withOverQueryFactor(1000).build();
+        final KnnFloatVectorQuery knnFloatVectorQuery =
+            new KnnFloatVectorQuery("test_field", target, k, filterQuery, searchStrategy);
         TopDocs topDocs = searcher.search(knnFloatVectorQuery, k);
         assertEquals(k, topDocs.totalHits.value());
         final float recall = calculateRecall(reader, groundTruthVectorsIds, topDocs, k);
@@ -1470,25 +1476,6 @@ private float calculateRecall(TopDocs topDocs, float minScoreInTopK) {
     return recall;
   }
 
-  private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
-      String fieldName, float[] target, int k, Query filterQuery) {
-    return getJVectorKnnFloatVectorQuery(
-        fieldName, target, k, filterQuery, JVectorReader.DEFAULT_OVER_QUERY_FACTOR);
-  }
-
-  private JVectorKnnFloatVectorQuery getJVectorKnnFloatVectorQuery(
-      String fieldName, float[] target, int k, Query filterQuery, int overQueryFactor) {
-    return new JVectorKnnFloatVectorQuery(
-        fieldName,
-        target,
-        k,
-        filterQuery,
-        overQueryFactor,
-        JVectorReader.DEFAULT_QUERY_SIMILARITY_THRESHOLD,
-        JVectorReader.DEFAULT_QUERY_RERANK_FLOOR,
-        JVectorReader.DEFAULT_QUERY_USE_PRUNING);
-  }
-
   private static float[] generateZerosVectorWithLastValue(int vectorDimension, int lastValue) {
     float[] vector = new float[vectorDimension];
     for (int i = 0; i < vectorDimension - 1; i++) {

From 7435bf9e5cda7c038ef9809cbe4016b4f23e5211 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 30 Oct 2025 14:58:02 +0000
Subject: [PATCH 28/86] Use IntUnaryOperator for
 numberOfSubspacesPerVetorSupplier

---
 .../lucene/sandbox/codecs/jvector/JVectorFormat.java | 10 +++++-----
 .../lucene/sandbox/codecs/jvector/JVectorWriter.java | 12 ++++++------
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 75aa58ba0181..ef61fda380a6 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -20,7 +20,7 @@
 import java.io.IOException;
 import java.util.concurrent.ForkJoinPool;
 import java.util.concurrent.ForkJoinWorkerThread;
-import java.util.function.Function;
+import java.util.function.IntUnaryOperator;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
@@ -55,8 +55,8 @@ public class JVectorFormat extends KnnVectorsFormat {
 
   private final int maxConn;
   private final int beamWidth;
-  private final Function<Integer, Integer>
-      numberOfSubspacesPerVectorSupplier; // as a function of the original dimension
+  // As a function of the original dimension
+  private final IntUnaryOperator numberOfSubspacesPerVectorSupplier;
   private final int minBatchSizeForQuantization;
   private final float alpha;
   private final float neighborOverflow;
@@ -91,7 +91,7 @@ public JVectorFormat(
       int beamWidth,
       float neighborOverflow,
       float alpha,
-      Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+      IntUnaryOperator numberOfSubspacesPerVectorSupplier,
       int minBatchSizeForQuantization,
       boolean hierarchyEnabled) {
     this(
@@ -111,7 +111,7 @@ public JVectorFormat(
       int beamWidth,
       float neighborOverflow,
       float alpha,
-      Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+      IntUnaryOperator numberOfSubspacesPerVectorSupplier,
       int minBatchSizeForQuantization,
       boolean hierarchyEnabled) {
     super(name);
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 0bf0ea618370..f7351c1832cf 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -38,7 +38,7 @@
 import java.io.UnsupportedEncodingException;
 import java.util.*;
 import java.util.concurrent.ForkJoinPool;
-import java.util.function.Function;
+import java.util.function.IntUnaryOperator;
 import java.util.stream.IntStream;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
@@ -98,9 +98,8 @@ public class JVectorWriter extends KnnVectorsWriter {
   private final int beamWidth;
   private final float degreeOverflow;
   private final float alpha;
-  private final Function<Integer, Integer>
-      numberOfSubspacesPerVectorSupplier; // Number of subspaces used per vector for PQ quantization
-  // as a function of the original dimension
+  /// Number of subspaces used per vector in PQ quantization as a function of the original dimension
+  private final IntUnaryOperator numberOfSubspacesPerVectorSupplier;
   private final int
       minimumBatchSizeForQuantization; // Threshold for the vector count above which we will trigger
   // PQ quantization
@@ -114,7 +113,7 @@ public JVectorWriter(
       int beamWidth,
       float degreeOverflow,
       float alpha,
-      Function<Integer, Integer> numberOfSubspacesPerVectorSupplier,
+      IntUnaryOperator numberOfSubspacesPerVectorSupplier,
       int minimumBatchSizeForQuantization,
       boolean hierarchyEnabled)
       throws IOException {
@@ -372,7 +371,8 @@ private PQVectors getPQVectors(
       throws IOException {
     final VectorSimilarityFunction vectorSimilarityFunction =
         fieldInfo.getVectorSimilarityFunction();
-    final var M = numberOfSubspacesPerVectorSupplier.apply(randomAccessVectorValues.dimension());
+    final int M =
+        numberOfSubspacesPerVectorSupplier.applyAsInt(randomAccessVectorValues.dimension());
     final var numberOfClustersPerSubspace =
         Math.min(256, randomAccessVectorValues.size()); // number of centroids per
     // subspace

From ddbae421f8b382e6ee2b352e356c504851d543ed Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 30 Oct 2025 17:00:13 +0000
Subject: [PATCH 29/86] Fix missed call to KnnCollector.incVisitedCount

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorReader.java   | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index a2abcda40c3d..1a766e6d2924 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -195,6 +195,10 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
         for (SearchResult.NodeScore ns : searchResults.getNodes()) {
           knnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
         }
+        // JVector does not seem to count the entry-point as visited
+        if (index.size(index.getMaxLevel()) > 0) {
+          knnCollector.incVisitedCount(1 + searchResults.getVisitedCount());
+        }
       }
     }
   }

From e5b7619a95492ed13e8ef7e053956e6f0a192e0e Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 30 Oct 2025 19:01:36 +0000
Subject: [PATCH 30/86] Skip search altogether when graph is empty

---
 .../lucene/sandbox/codecs/jvector/JVectorReader.java      | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 1a766e6d2924..9f0354c1ca27 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -155,6 +155,10 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
     final SearchScoreProvider ssp;
 
     try (var view = index.getView()) {
+      if (view.entryNode() == null) {
+        // Skip search when the graph is empty
+        return;
+      }
       if (fieldEntryMap.get(field).pqVectors
           != null) { // Quantized, use the precomputed score function
         final PQVectors pqVectors = fieldEntryMap.get(field).pqVectors;
@@ -196,9 +200,7 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
           knnCollector.collect(jvectorLuceneDocMap.getLuceneDocId(ns.node), ns.score);
         }
         // JVector does not seem to count the entry-point as visited
-        if (index.size(index.getMaxLevel()) > 0) {
-          knnCollector.incVisitedCount(1 + searchResults.getVisitedCount());
-        }
+        knnCollector.incVisitedCount(1 + searchResults.getVisitedCount());
       }
     }
   }

From 41856a6c86f2dccb53d5a87b256cf60a5add033a Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 30 Oct 2025 20:05:01 +0000
Subject: [PATCH 31/86] Fix multiple fieldEntry lookups

---
 .../sandbox/codecs/jvector/JVectorReader.java | 20 ++++++++-----------
 1 file changed, 8 insertions(+), 12 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 9f0354c1ca27..45dad566f626 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -23,7 +23,6 @@
 import io.github.jbellis.jvector.graph.SearchResult;
 import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
 import io.github.jbellis.jvector.graph.similarity.DefaultSearchScoreProvider;
-import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
 import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
 import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.quantization.ProductQuantization;
@@ -140,7 +139,8 @@ public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
   @Override
   public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs)
       throws IOException {
-    final OnDiskGraphIndex index = fieldEntryMap.get(field).index;
+    final var fieldEntry = fieldEntryMap.get(field);
+    final OnDiskGraphIndex index = fieldEntry.index;
 
     final JVectorSearchStrategy searchStrategy;
     if (knnCollector.getSearchStrategy() instanceof JVectorSearchStrategy strategy) {
@@ -159,21 +159,17 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
         // Skip search when the graph is empty
         return;
       }
-      if (fieldEntryMap.get(field).pqVectors
-          != null) { // Quantized, use the precomputed score function
-        final PQVectors pqVectors = fieldEntryMap.get(field).pqVectors;
+      if (fieldEntry.pqVectors != null) { // Quantized, use the precomputed score function
+        final PQVectors pqVectors = fieldEntry.pqVectors;
         // SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
         // then reranks with the exact vectors that are stored on disk in the index
-        ScoreFunction.ApproximateScoreFunction asf =
-            pqVectors.precomputedScoreFunctionFor(q, fieldEntryMap.get(field).similarityFunction);
-        ScoreFunction.ExactScoreFunction reranker =
-            view.rerankerFor(q, fieldEntryMap.get(field).similarityFunction);
+        final var asf = pqVectors.precomputedScoreFunctionFor(q, fieldEntry.similarityFunction);
+        final var reranker = view.rerankerFor(q, fieldEntry.similarityFunction);
         ssp = new DefaultSearchScoreProvider(asf, reranker);
       } else { // Not quantized, used typical searcher
-        ssp =
-            DefaultSearchScoreProvider.exact(q, fieldEntryMap.get(field).similarityFunction, view);
+        ssp = DefaultSearchScoreProvider.exact(q, fieldEntry.similarityFunction, view);
       }
-      final GraphNodeIdToDocMap jvectorLuceneDocMap = fieldEntryMap.get(field).graphNodeIdToDocMap;
+      final GraphNodeIdToDocMap jvectorLuceneDocMap = fieldEntry.graphNodeIdToDocMap;
       // Convert the acceptDocs bitmap from Lucene to jVector ordinal bitmap filter
       // Logic works as follows: if acceptDocs is null, we accept all ordinals. Otherwise, we check
       // if the jVector ordinal has a

From d5b6d4fff579f28667eaf1feddfada9e48eeaf55 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 30 Oct 2025 20:55:09 +0000
Subject: [PATCH 32/86] Do not write empty graph

---
 .../sandbox/codecs/jvector/JVectorReader.java | 73 ++++++++++++++-----
 .../sandbox/codecs/jvector/JVectorWriter.java | 23 +++++-
 2 files changed, 76 insertions(+), 20 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 45dad566f626..24ebdbc2dcf7 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -42,6 +42,7 @@
 import org.apache.lucene.index.*;
 import org.apache.lucene.search.AcceptDocs;
 import org.apache.lucene.search.KnnCollector;
+import org.apache.lucene.search.VectorScorer;
 import org.apache.lucene.search.knn.KnnSearchStrategy;
 import org.apache.lucene.store.*;
 import org.apache.lucene.util.IOUtils;
@@ -107,6 +108,35 @@ public void checkIntegrity() throws IOException {
   @Override
   public FloatVectorValues getFloatVectorValues(String field) throws IOException {
     final FieldEntry fieldEntry = fieldEntryMap.get(field);
+    if (fieldEntry == null || fieldEntry.index == null) {
+      return new FloatVectorValues() {
+        @Override
+        public float[] vectorValue(int ord) throws IOException {
+          throw new IndexOutOfBoundsException();
+        }
+
+        @Override
+        public FloatVectorValues copy() throws IOException {
+          return this;
+        }
+
+        @Override
+        public int dimension() {
+          return fieldEntry.vectorDimension;
+        }
+
+        @Override
+        public int size() {
+          return 0;
+        }
+
+        @Override
+        public VectorScorer scorer(float[] target) throws IOException {
+          return null;
+        }
+      };
+    }
+
     return new JVectorFloatVectorValues(
         fieldEntry.index, fieldEntry.similarityFunction, fieldEntry.graphNodeIdToDocMap);
   }
@@ -132,8 +162,9 @@ public RandomAccessReader getNeighborsScoreCacheForField(String field) throws IO
     return fieldEntry.neighborsScoreCacheIndexReaderSupplier.get();
   }
 
-  public OnDiskGraphIndex getOnDiskGraphIndex(String field) throws IOException {
-    return fieldEntryMap.get(field).index;
+  public boolean hasIndex(String field) {
+    final var fieldEntry = fieldEntryMap.get(field);
+    return fieldEntry != null && fieldEntry.index != null;
   }
 
   @Override
@@ -141,6 +172,10 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
       throws IOException {
     final var fieldEntry = fieldEntryMap.get(field);
     final OnDiskGraphIndex index = fieldEntry.index;
+    if (index == null) {
+      // Skip search when the graph is empty
+      return;
+    }
 
     final JVectorSearchStrategy searchStrategy;
     if (knnCollector.getSearchStrategy() instanceof JVectorSearchStrategy strategy) {
@@ -155,10 +190,6 @@ public void search(String field, float[] target, KnnCollector knnCollector, Acce
     final SearchScoreProvider ssp;
 
     try (var view = index.getView()) {
-      if (view.entryNode() == null) {
-        // Skip search when the graph is empty
-        return;
-      }
       if (fieldEntry.pqVectors != null) { // Quantized, use the precomputed score function
         final PQVectors pqVectors = fieldEntry.pqVectors;
         // SearchScoreProvider that does a first pass with the loaded-in-memory PQVectors,
@@ -228,6 +259,7 @@ private void readFields(ChecksumIndexInput meta) throws IOException {
 
   class FieldEntry implements Closeable {
     private final VectorSimilarityFunction similarityFunction;
+    private final int vectorDimension;
     private final long vectorIndexOffset;
     private final long vectorIndexLength;
     private final long pqCodebooksAndVectorsLength;
@@ -247,6 +279,7 @@ public FieldEntry(
       this.similarityFunction =
           VectorSimilarityMapper.ordToDistFunc(
               vectorIndexFieldMetadata.vectorSimilarityFunction.ordinal());
+      this.vectorDimension = vectorIndexFieldMetadata.vectorDimension;
       this.vectorIndexOffset = vectorIndexFieldMetadata.vectorIndexOffset;
       this.vectorIndexLength = vectorIndexFieldMetadata.vectorIndexLength;
       this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.pqCodebooksAndVectorsLength;
@@ -262,18 +295,22 @@ public FieldEntry(
               + "."
               + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
 
-      // For the slice we would like to include the Lucene header, unfortunately, we have to do this
-      // because jVector use global
-      // offsets instead of local offsets
-      final long sliceLength =
-          vectorIndexLength
-              + CodecUtil.indexHeaderLength(
-                  JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
-      // Load the graph index
-      this.indexReaderSupplier =
-          new JVectorRandomAccessReader.Supplier(
-              directory.openInput(vectorIndexFieldDataFileName, state.context), 0, sliceLength);
-      this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
+      if (vectorIndexLength != 0) {
+        // For the slice we would like to include the Lucene header, unfortunately, we have to do
+        // this because jVector use global offsets instead of local offsets
+        final long sliceLength =
+            vectorIndexLength
+                + CodecUtil.indexHeaderLength(
+                    JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
+        // Load the graph index
+        this.indexReaderSupplier =
+            new JVectorRandomAccessReader.Supplier(
+                directory.openInput(vectorIndexFieldDataFileName, state.context), 0, sliceLength);
+        this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
+      } else {
+        this.indexReaderSupplier = null;
+        this.index = null;
+      }
 
       // If quantized load the compressed product quantized vectors with their codebooks
       if (pqCodebooksAndVectorsLength > 0) {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index f7351c1832cf..a384937248c1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -285,7 +285,9 @@ private void writeField(
           JVectorFormat.VERSION_CURRENT,
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
-      graph.save(jVectorIndexWriter);
+      if (graph.entryNode() != null) {
+        graph.save(jVectorIndexWriter);
+      }
       CodecUtil.writeFooter(indexOutput);
     }
   }
@@ -323,6 +325,21 @@ private VectorIndexFieldMetadata writeGraph(
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
       final long startOffset = indexOutput.getFilePointer();
+      if (graph.size() == 0) {
+        CodecUtil.writeFooter(indexOutput);
+        return new VectorIndexFieldMetadata(
+          fieldInfo.number,
+          fieldInfo.getVectorEncoding(),
+          fieldInfo.getVectorSimilarityFunction(),
+          randomAccessVectorValues.dimension(),
+          0,
+          0,
+          0,
+          0,
+          degreeOverflow,
+          graphNodeIdToDocMap
+        );
+      }
       try (var writer =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
               .with(new InlineVectors(randomAccessVectorValues.dimension()))
@@ -896,7 +913,9 @@ public void merge() throws IOException {
                 this, graphNodeIdsToRavvOrds, getVectorSimilarityFunction(fieldInfo));
         // graph = getGraph(buildScoreProvider, this, newToOldOrds, fieldInfo,
         // segmentWriteState.segmentInfo.name);
-        if (!deletesFound) {
+        if (!deletesFound
+            && leadingReader instanceof JVectorReader reader
+            && reader.hasIndex(fieldName)) {
           // Expand graph when there are no deletes and no PQ codebooks
           final RandomAccessReader leadingOnHeapGraphReader =
               leadingReader.getNeighborsScoreCacheForField(fieldName);

From 7726aa0455331fcca7065bad3eb73122837dd886 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 30 Oct 2025 21:59:38 +0000
Subject: [PATCH 33/86] Fix merging other formats

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 81 ++++++++++---------
 1 file changed, 43 insertions(+), 38 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index a384937248c1..0d714ae39c46 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -35,6 +35,7 @@
 import io.github.jbellis.jvector.vector.types.VectorFloat;
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.io.UnsupportedEncodingException;
 import java.util.*;
 import java.util.concurrent.ForkJoinPool;
@@ -84,6 +85,8 @@
  * with {@link GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
  */
 public class JVectorWriter extends KnnVectorsWriter {
+  private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
+      VectorizationProvider.getInstance().getVectorTypeSupport();
   private static final long SHALLOW_RAM_BYTES_USED =
       RamUsageEstimator.shallowSizeOfInstance(JVectorWriter.class);
 
@@ -328,17 +331,16 @@ private VectorIndexFieldMetadata writeGraph(
       if (graph.size() == 0) {
         CodecUtil.writeFooter(indexOutput);
         return new VectorIndexFieldMetadata(
-          fieldInfo.number,
-          fieldInfo.getVectorEncoding(),
-          fieldInfo.getVectorSimilarityFunction(),
-          randomAccessVectorValues.dimension(),
-          0,
-          0,
-          0,
-          0,
-          degreeOverflow,
-          graphNodeIdToDocMap
-        );
+            fieldInfo.number,
+            fieldInfo.getVectorEncoding(),
+            fieldInfo.getVectorSimilarityFunction(),
+            randomAccessVectorValues.dimension(),
+            0,
+            0,
+            0,
+            0,
+            degreeOverflow,
+            graphNodeIdToDocMap);
       }
       try (var writer =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
@@ -516,8 +518,6 @@ public long ramBytesUsed() {
    *     support specific implementations, such as float[] or byte[] vectors.
    */
   static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
-    private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
-        VectorizationProvider.getInstance().getVectorTypeSupport();
     private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
     private final FieldInfo fieldInfo;
     private int lastDocID = -1;
@@ -605,7 +605,7 @@ class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
 
     // Array of sub-readers
     private final KnnVectorsReader[] readers;
-    private final JVectorFloatVectorValues[] perReaderFloatVectorValues;
+    private final FloatVectorValues[] perReaderFloatVectorValues;
 
     // Maps the ravv ordinals to the reader index and the ordinal in that reader. This is allowing
     // us to get a unified view of all the
@@ -662,7 +662,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         FieldInfos fieldInfos = mergeState.fieldInfos[i];
         baseOrds[i] = totalVectorsCount;
         if (MergedVectorValues.hasVectorValues(fieldInfos, fieldName)) {
-          KnnVectorsReader reader = mergeState.knnVectorsReaders[i];
+          KnnVectorsReader reader = mergeState.knnVectorsReaders[i].unwrapReaderForField(fieldName);
           if (reader != null) {
             FloatVectorValues values = reader.getFloatVectorValues(fieldName);
             if (values != null) {
@@ -678,7 +678,8 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
                   deletesFound = true;
                 }
               }
-              if (liveVectorCountInReader >= vectorsCountInLeadingReader) {
+              if (reader instanceof JVectorReader
+                  && liveVectorCountInReader >= vectorsCountInLeadingReader) {
                 vectorsCountInLeadingReader = liveVectorCountInReader;
                 tempLeadingReaderIdx = i;
               }
@@ -706,7 +707,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       // For this part we need to make sure we also swap all the other metadata arrays that are
       // indexed by reader index
       // Such as readers, docMaps, liveDocs, baseOrds, deletedOrds
-      if (tempLeadingReaderIdx != 0) {
+      if (tempLeadingReaderIdx > 0) {
         final KnnVectorsReader temp = readers[LEADING_READER_IDX];
         readers[LEADING_READER_IDX] = readers[tempLeadingReaderIdx];
         readers[tempLeadingReaderIdx] = temp;
@@ -720,7 +721,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         baseOrds[tempLeadingReaderIdx] = tempBaseOrd;
       }
 
-      this.perReaderFloatVectorValues = new JVectorFloatVectorValues[readers.length];
+      this.perReaderFloatVectorValues = new FloatVectorValues[readers.length];
       this.dimension = dimension;
 
       // Build mapping from global ordinal to [readerIndex, readerOrd]
@@ -743,8 +744,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         // TODO: remove this logic once we support incremental graph building with deletes see
         // https://github.com/opensearch-project/opensearch-jvector/issues/171
         for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
-          final JVectorFloatVectorValues values =
-              (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
+          final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
           perReaderFloatVectorValues[readerIdx] = values;
           // For each vector in this reader
           KnnVectorValues.DocIndexIterator it = values.iterator();
@@ -778,8 +778,8 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         // This is necessary because we are later going to expand that graph with new vectors from
         // the other readers.
         // The leading reader is ALWAYS the first one in the readers array
-        final JVectorFloatVectorValues leadingReaderValues =
-            (JVectorFloatVectorValues) readers[LEADING_READER_IDX].getFloatVectorValues(fieldName);
+        final FloatVectorValues leadingReaderValues =
+            readers[LEADING_READER_IDX].getFloatVectorValues(fieldName);
         perReaderFloatVectorValues[LEADING_READER_IDX] = leadingReaderValues;
         var leadingReaderIt = leadingReaderValues.iterator();
         for (int docId = leadingReaderIt.nextDoc();
@@ -802,8 +802,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         // For the remaining readers we map the graph node id to the ravv ordinal in the order they
         // appear
         for (int readerIdx = 1; readerIdx < readers.length; readerIdx++) {
-          final JVectorFloatVectorValues values =
-              (JVectorFloatVectorValues) readers[readerIdx].getFloatVectorValues(fieldName);
+          final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
           perReaderFloatVectorValues[readerIdx] = values;
           // For each vector in this reader
           KnnVectorValues.DocIndexIterator it = values.iterator();
@@ -873,21 +872,14 @@ public void merge() throws IOException {
       final PQVectors pqVectors;
       final OnHeapGraphIndex graph;
       // Get the leading reader
-      final JVectorReader leadingReader =
-          (JVectorReader) readers[LEADING_READER_IDX].unwrapReaderForField(fieldName);
+      final var leadingReader = readers[LEADING_READER_IDX].unwrapReaderForField(fieldName);
       final BuildScoreProvider buildScoreProvider;
       // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the
       // remaining vectors
-      if (leadingReader.getProductQuantizationForField(fieldInfo.name).isEmpty()) {
-        // No pre-existing codebooks, check if we have enough vectors to trigger quantization
-        if (this.size() >= minimumBatchSizeForQuantization) {
-          pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
-        } else {
-          pqVectors = null;
-        }
-      } else {
-        ProductQuantization leadingCompressor =
-            leadingReader.getProductQuantizationForField(fieldName).get();
+      if (leadingReader instanceof JVectorReader reader
+          && reader.getProductQuantizationForField(fieldName).isPresent()) {
+        final ProductQuantization leadingCompressor =
+            reader.getProductQuantizationForField(fieldName).get();
         // Refine the leadingCompressor with the remaining vectors in the merge, we skip the leading
         // reader since it's already been
         // used to create the leadingCompressor
@@ -905,6 +897,11 @@ public void merge() throws IOException {
                 graphNodeIdsToRavvOrds,
                 this,
                 SIMD_POOL_MERGE);
+      } else if (this.size() >= minimumBatchSizeForQuantization) {
+        // No pre-existing codebooks, check if we have enough vectors to trigger quantization
+        pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
+      } else {
+        pqVectors = null;
       }
 
       if (pqVectors == null) {
@@ -918,7 +915,7 @@ public void merge() throws IOException {
             && reader.hasIndex(fieldName)) {
           // Expand graph when there are no deletes and no PQ codebooks
           final RandomAccessReader leadingOnHeapGraphReader =
-              leadingReader.getNeighborsScoreCacheForField(fieldName);
+              reader.getNeighborsScoreCacheForField(fieldName);
           final int numBaseVectors = leadingReader.getFloatVectorValues(fieldName).size();
           graph =
               (OnHeapGraphIndex)
@@ -985,7 +982,15 @@ public VectorFloat<?> getVector(int ord) {
 
       // Access to float values is not thread safe
       synchronized (perReaderFloatVectorValues[readerIdx]) {
-        return perReaderFloatVectorValues[readerIdx].vectorFloatValue(readerOrd);
+        if (perReaderFloatVectorValues[readerIdx] instanceof JVectorFloatVectorValues values) {
+          return values.vectorFloatValue(readerOrd);
+        }
+        try {
+          return VECTOR_TYPE_SUPPORT.createFloatVector(
+              perReaderFloatVectorValues[readerIdx].vectorValue(readerOrd));
+        } catch (IOException e) {
+          throw new UncheckedIOException(e);
+        }
       }
     }
 

From 6cb01839b8424027d1b9b1aa3781af8390772f08 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 1 Nov 2025 17:17:09 +0000
Subject: [PATCH 34/86] Remove incremental graph build

---
 .../sandbox/codecs/jvector/JVectorFormat.java |   4 -
 .../sandbox/codecs/jvector/JVectorReader.java |  31 ---
 .../sandbox/codecs/jvector/JVectorWriter.java | 196 +++---------------
 3 files changed, 33 insertions(+), 198 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index ef61fda380a6..af727760c375 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -32,13 +32,9 @@ public class JVectorFormat extends KnnVectorsFormat {
   public static final String NAME = "JVectorFormat";
   public static final String META_CODEC_NAME = "JVectorVectorsFormatMeta";
   public static final String VECTOR_INDEX_CODEC_NAME = "JVectorVectorsFormatIndex";
-  public static final String NEIGHBORS_SCORE_CACHE_CODEC_NAME =
-      "JVectorVectorsFormatNeighborsScoreCache";
   public static final String JVECTOR_FILES_SUFFIX = "jvector";
   public static final String META_EXTENSION = "meta-" + JVECTOR_FILES_SUFFIX;
   public static final String VECTOR_INDEX_EXTENSION = "data-" + JVECTOR_FILES_SUFFIX;
-  public static final String NEIGHBORS_SCORE_CACHE_EXTENSION =
-      "neighbors-score-cache-" + JVECTOR_FILES_SUFFIX;
 
   public static final int VERSION_START = 0;
   public static final int VERSION_CURRENT = VERSION_START;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 24ebdbc2dcf7..a7ca23e98132 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -17,7 +17,6 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
-import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.disk.ReaderSupplier;
 import io.github.jbellis.jvector.graph.GraphSearcher;
 import io.github.jbellis.jvector.graph.SearchResult;
@@ -95,13 +94,6 @@ public void checkIntegrity() throws IOException {
           state.directory.openInput(fieldEntry.vectorIndexFieldDataFileName, IOContext.READONCE)) {
         CodecUtil.checksumEntireFile(indexInput);
       }
-
-      // Verify the neighbors score cache file
-      try (var indexInput =
-          state.directory.openInput(
-              fieldEntry.neighborsScoreCacheIndexFieldFileName, IOContext.READONCE)) {
-        CodecUtil.checksumEntireFile(indexInput);
-      }
     }
   }
 
@@ -157,11 +149,6 @@ public Optional<ProductQuantization> getProductQuantizationForField(String field
     return Optional.of(fieldEntry.pqVectors.getCompressor());
   }
 
-  public RandomAccessReader getNeighborsScoreCacheForField(String field) throws IOException {
-    final FieldEntry fieldEntry = fieldEntryMap.get(field);
-    return fieldEntry.neighborsScoreCacheIndexReaderSupplier.get();
-  }
-
   public boolean hasIndex(String field) {
     final var fieldEntry = fieldEntryMap.get(field);
     return fieldEntry != null && fieldEntry.index != null;
@@ -265,11 +252,9 @@ class FieldEntry implements Closeable {
     private final long pqCodebooksAndVectorsLength;
     private final long pqCodebooksAndVectorsOffset;
     private final String vectorIndexFieldDataFileName;
-    private final String neighborsScoreCacheIndexFieldFileName;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final ReaderSupplier indexReaderSupplier;
     private final ReaderSupplier pqCodebooksReaderSupplier;
-    private final ReaderSupplier neighborsScoreCacheIndexReaderSupplier;
     private final OnDiskGraphIndex index;
     private final PQVectors pqVectors; // The product quantized vectors with their codebooks
 
@@ -288,12 +273,6 @@ public FieldEntry(
 
       this.vectorIndexFieldDataFileName =
           baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
-      this.neighborsScoreCacheIndexFieldFileName =
-          baseDataFileName
-              + "_"
-              + fieldInfo.name
-              + "."
-              + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
 
       if (vectorIndexLength != 0) {
         // For the slice we would like to include the Lucene header, unfortunately, we have to do
@@ -331,13 +310,6 @@ public FieldEntry(
         this.pqCodebooksReaderSupplier = null;
         this.pqVectors = null;
       }
-
-      final IndexInput indexInput =
-          directory.openInput(neighborsScoreCacheIndexFieldFileName, state.context);
-      CodecUtil.readIndexHeader(indexInput);
-
-      this.neighborsScoreCacheIndexReaderSupplier =
-          new JVectorRandomAccessReader.Supplier(indexInput);
     }
 
     @Override
@@ -348,9 +320,6 @@ public void close() throws IOException {
       if (pqCodebooksReaderSupplier != null) {
         IOUtils.close(pqCodebooksReaderSupplier::close);
       }
-      if (neighborsScoreCacheIndexReaderSupplier != null) {
-        IOUtils.close(neighborsScoreCacheIndexReaderSupplier::close);
-      }
     }
   }
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 0d714ae39c46..bcc3b66d9ae0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -22,7 +22,6 @@
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_FLUSH;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_MERGE;
 
-import io.github.jbellis.jvector.disk.RandomAccessReader;
 import io.github.jbellis.jvector.graph.*;
 import io.github.jbellis.jvector.graph.disk.*;
 import io.github.jbellis.jvector.graph.disk.feature.Feature;
@@ -270,29 +269,6 @@ private void writeField(
             graphNodeIdToDocMap);
     meta.writeInt(fieldInfo.number);
     vectorIndexFieldMetadata.toOutput(meta);
-
-    // field data file, which contains the graph
-    final String neighborsScoreCacheIndexFieldFileName =
-        baseDataFileName
-            + "_"
-            + fieldInfo.name
-            + "."
-            + JVectorFormat.NEIGHBORS_SCORE_CACHE_EXTENSION;
-    try (IndexOutput indexOutput =
-            segmentWriteState.directory.createOutput(
-                neighborsScoreCacheIndexFieldFileName, segmentWriteState.context);
-        final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)) {
-      CodecUtil.writeIndexHeader(
-          indexOutput,
-          JVectorFormat.NEIGHBORS_SCORE_CACHE_CODEC_NAME,
-          JVectorFormat.VERSION_CURRENT,
-          segmentWriteState.segmentInfo.getId(),
-          segmentWriteState.segmentSuffix);
-      if (graph.entryNode() != null) {
-        graph.save(jVectorIndexWriter);
-      }
-      CodecUtil.writeFooter(indexOutput);
-    }
   }
 
   /**
@@ -622,7 +598,6 @@ class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
     private final FieldInfo fieldInfo;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final int[] graphNodeIdsToRavvOrds;
-    private boolean deletesFound = false;
 
     /**
      * Creates a random access view over merged float vector values.
@@ -649,12 +624,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
       final Bits[] liveDocs = mergeState.liveDocs.clone();
       final int[] baseOrds = new int[mergeState.knnVectorsReaders.length];
-      final int[] deletedOrds =
-          new int
-              [mergeState
-                  .knnVectorsReaders
-                  .length]; // counts the number of deleted documents in each reader
-      // that previously had a vector
 
       // Find the leading reader, count the total number of live vectors, and the base ordinals for
       // each reader
@@ -673,9 +642,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
               while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
                 if (liveDocs[i] == null || liveDocs[i].get(it.docID())) {
                   liveVectorCountInReader++;
-                } else {
-                  deletedOrds[i]++;
-                  deletesFound = true;
                 }
               }
               if (reader instanceof JVectorReader
@@ -738,95 +704,32 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       this.graphNodeIdsToRavvOrds = new int[totalLiveVectorsCount];
 
       int graphNodeId = 0;
-      if (deletesFound) {
-        // If there are deletes, we need to build a new graph from scratch and compact the graph
-        // node ids
-        // TODO: remove this logic once we support incremental graph building with deletes see
-        // https://github.com/opensearch-project/opensearch-jvector/issues/171
-        for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
-          final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
-          perReaderFloatVectorValues[readerIdx] = values;
-          // For each vector in this reader
-          KnnVectorValues.DocIndexIterator it = values.iterator();
-
-          for (int docId = it.nextDoc();
-              docId != DocIdSetIterator.NO_MORE_DOCS;
-              docId = it.nextDoc()) {
-            if (docMaps[readerIdx].get(docId) != -1) {
-              // Mapping from ravv ordinals to [readerIndex, readerOrd]
-              // Map graph node id to ravv ordinal
-              // Map graph node id to doc id
-              final int newGlobalDocId = docMaps[readerIdx].get(docId);
-              final int ravvLocalOrd = it.index();
-              final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
-              graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
-              graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
-              graphNodeId++;
-              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
-              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
-            }
-
-            documentsIterated++;
-          }
-        }
-      } else {
-        // If there are no deletes, we can reuse the existing graph and simply remap the ravv
-        // ordinals to the new global doc ids
-        // for the leading reader we must preserve the original node Ids and map them to the
-        // corresponding ravv vectors originally
-        // used to build the graph
-        // This is necessary because we are later going to expand that graph with new vectors from
-        // the other readers.
-        // The leading reader is ALWAYS the first one in the readers array
-        final FloatVectorValues leadingReaderValues =
-            readers[LEADING_READER_IDX].getFloatVectorValues(fieldName);
-        perReaderFloatVectorValues[LEADING_READER_IDX] = leadingReaderValues;
-        var leadingReaderIt = leadingReaderValues.iterator();
-        for (int docId = leadingReaderIt.nextDoc();
+      // Build a new graph from scratch and compact the graph node ids
+      for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
+        final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
+        perReaderFloatVectorValues[readerIdx] = values;
+        // For each vector in this reader
+        KnnVectorValues.DocIndexIterator it = values.iterator();
+
+        for (int docId = it.nextDoc();
             docId != DocIdSetIterator.NO_MORE_DOCS;
-            docId = leadingReaderIt.nextDoc()) {
-          final int newGlobalDocId = docMaps[LEADING_READER_IDX].get(docId);
-          if (newGlobalDocId != -1) {
-            final int ravvLocalOrd = leadingReaderIt.index();
-            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[LEADING_READER_IDX];
-            graphNodeIdToDocIds[ravvLocalOrd] = newGlobalDocId;
-            graphNodeIdsToRavvOrds[ravvLocalOrd] = ravvGlobalOrd;
+            docId = it.nextDoc()) {
+          if (docMaps[readerIdx].get(docId) != -1) {
+            // Mapping from ravv ordinals to [readerIndex, readerOrd]
+            // Map graph node id to ravv ordinal
+            // Map graph node id to doc id
+            final int newGlobalDocId = docMaps[readerIdx].get(docId);
+            final int ravvLocalOrd = it.index();
+            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
+            graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
+            graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
             graphNodeId++;
-            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = LEADING_READER_IDX; // Reader index
+            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
             ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
           }
 
           documentsIterated++;
         }
-
-        // For the remaining readers we map the graph node id to the ravv ordinal in the order they
-        // appear
-        for (int readerIdx = 1; readerIdx < readers.length; readerIdx++) {
-          final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
-          perReaderFloatVectorValues[readerIdx] = values;
-          // For each vector in this reader
-          KnnVectorValues.DocIndexIterator it = values.iterator();
-
-          for (int docId = it.nextDoc();
-              docId != DocIdSetIterator.NO_MORE_DOCS;
-              docId = it.nextDoc()) {
-            if (docMaps[readerIdx].get(docId) != -1) {
-              // Mapping from ravv ordinals to [readerIndex, readerOrd]
-              // Map graph node id to ravv ordinal
-              // Map graph node id to doc id
-              final int newGlobalDocId = docMaps[readerIdx].get(docId);
-              final int ravvLocalOrd = it.index();
-              final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
-              graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
-              graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
-              graphNodeId++;
-              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
-              ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
-            }
-
-            documentsIterated++;
-          }
-        }
       }
 
       if (documentsIterated < totalVectorsCount) {
@@ -870,10 +773,8 @@ public void merge() throws IOException {
       // Get PQ compressor for leading reader
       final String fieldName = fieldInfo.name;
       final PQVectors pqVectors;
-      final OnHeapGraphIndex graph;
       // Get the leading reader
       final var leadingReader = readers[LEADING_READER_IDX].unwrapReaderForField(fieldName);
-      final BuildScoreProvider buildScoreProvider;
       // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the
       // remaining vectors
       if (leadingReader instanceof JVectorReader reader
@@ -904,43 +805,8 @@ public void merge() throws IOException {
         pqVectors = null;
       }
 
-      if (pqVectors == null) {
-        buildScoreProvider =
-            BuildScoreProvider.randomAccessScoreProvider(
-                this, graphNodeIdsToRavvOrds, getVectorSimilarityFunction(fieldInfo));
-        // graph = getGraph(buildScoreProvider, this, newToOldOrds, fieldInfo,
-        // segmentWriteState.segmentInfo.name);
-        if (!deletesFound
-            && leadingReader instanceof JVectorReader reader
-            && reader.hasIndex(fieldName)) {
-          // Expand graph when there are no deletes and no PQ codebooks
-          final RandomAccessReader leadingOnHeapGraphReader =
-              reader.getNeighborsScoreCacheForField(fieldName);
-          final int numBaseVectors = leadingReader.getFloatVectorValues(fieldName).size();
-          graph =
-              (OnHeapGraphIndex)
-                  GraphIndexBuilder.buildAndMergeNewNodes(
-                      leadingOnHeapGraphReader,
-                      this,
-                      buildScoreProvider,
-                      numBaseVectors,
-                      graphNodeIdsToRavvOrds,
-                      beamWidth,
-                      degreeOverflow,
-                      alpha,
-                      hierarchyEnabled);
-        } else {
-          // Build a new graph from scratch when there are deletes and no PQ codebooks
-          graph =
-              getGraph(
-                  buildScoreProvider,
-                  this,
-                  graphNodeIdsToRavvOrds,
-                  fieldInfo,
-                  segmentWriteState.segmentInfo.name,
-                  SIMD_POOL_MERGE);
-        }
-      } else {
+      final BuildScoreProvider buildScoreProvider;
+      if (pqVectors != null) {
         // Re-use PQ codebooks to build a new graph from scratch
         buildScoreProvider =
             BuildScoreProvider.pqBuildScoreProvider(
@@ -948,15 +814,19 @@ public void merge() throws IOException {
         // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD
         // threads)
         buildScoreProvider.diversityProviderFor(0);
-        graph =
-            getGraph(
-                buildScoreProvider,
-                this,
-                graphNodeIdsToRavvOrds,
-                fieldInfo,
-                segmentWriteState.segmentInfo.name,
-                SIMD_POOL_MERGE);
+      } else {
+        buildScoreProvider =
+            BuildScoreProvider.randomAccessScoreProvider(
+                this, graphNodeIdsToRavvOrds, getVectorSimilarityFunction(fieldInfo));
       }
+      final OnHeapGraphIndex graph =
+          getGraph(
+              buildScoreProvider,
+              this,
+              graphNodeIdsToRavvOrds,
+              fieldInfo,
+              segmentWriteState.segmentInfo.name,
+              SIMD_POOL_MERGE);
 
       writeField(fieldInfo, this, pqVectors, graphNodeIdsToRavvOrds, graphNodeIdToDocMap, graph);
     }

From 47080f5c7af4f5b644d3cd50879fa3ebc8a9eea0 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 1 Nov 2025 17:35:11 +0000
Subject: [PATCH 35/86] Fix leading readers indexing error

---
 .../lucene/sandbox/codecs/jvector/JVectorWriter.java       | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index bcc3b66d9ae0..93762a726db1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -620,7 +620,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       int dimension = 0;
       int tempLeadingReaderIdx = -1;
       int vectorsCountInLeadingReader = -1;
-      List<KnnVectorsReader> allReaders = new ArrayList<>();
+      this.readers = mergeState.knnVectorsReaders.clone();
       final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
       final Bits[] liveDocs = mergeState.liveDocs.clone();
       final int[] baseOrds = new int[mergeState.knnVectorsReaders.length];
@@ -635,7 +635,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
           if (reader != null) {
             FloatVectorValues values = reader.getFloatVectorValues(fieldName);
             if (values != null) {
-              allReaders.add(reader);
               int vectorCountInReader = values.size();
               int liveVectorCountInReader = 0;
               KnnVectorValues.DocIndexIterator it = values.iterator();
@@ -664,10 +663,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       assert (dimension > 0) : "No vectors found for field " + fieldName;
 
       this.size = totalVectorsCount;
-      this.readers = new KnnVectorsReader[allReaders.size()];
-      for (int i = 0; i < readers.length; i++) {
-        readers[i] = allReaders.get(i);
-      }
 
       // always swap the leading reader to the first position
       // For this part we need to make sure we also swap all the other metadata arrays that are

From 34f0685d2ee198eb9f74a959087bfded53650809 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 1 Nov 2025 18:12:48 +0000
Subject: [PATCH 36/86] Fix PQ refinement

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 58 +++++++------------
 1 file changed, 22 insertions(+), 36 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 93762a726db1..3f73810c9469 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -577,7 +577,6 @@ static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimila
   class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
     private static final int READER_ID = 0;
     private static final int READER_ORD = 1;
-    private static final int LEADING_READER_IDX = 0;
 
     // Array of sub-readers
     private final KnnVectorsReader[] readers;
@@ -598,6 +597,8 @@ class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
     private final FieldInfo fieldInfo;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final int[] graphNodeIdsToRavvOrds;
+    private final int pqReaderIndex;
+    private final ProductQuantization pq;
 
     /**
      * Creates a random access view over merged float vector values.
@@ -618,7 +619,8 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       int totalVectorsCount = 0;
       int totalLiveVectorsCount = 0;
       int dimension = 0;
-      int tempLeadingReaderIdx = -1;
+      int pqReaderIndex = -1;
+      ProductQuantization pq = null;
       int vectorsCountInLeadingReader = -1;
       this.readers = mergeState.knnVectorsReaders.clone();
       final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
@@ -643,10 +645,14 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
                   liveVectorCountInReader++;
                 }
               }
-              if (reader instanceof JVectorReader
+              if (reader instanceof JVectorReader jVectorReader
                   && liveVectorCountInReader >= vectorsCountInLeadingReader) {
                 vectorsCountInLeadingReader = liveVectorCountInReader;
-                tempLeadingReaderIdx = i;
+                final var maybeNewPq = jVectorReader.getProductQuantizationForField(fieldName);
+                if (maybeNewPq.isPresent()) {
+                  pqReaderIndex = i;
+                  pq = maybeNewPq.get();
+                }
               }
               totalVectorsCount += vectorCountInReader;
               totalLiveVectorsCount += liveVectorCountInReader;
@@ -662,26 +668,9 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
           : "Total number of live vectors exceeds the total number of vectors";
       assert (dimension > 0) : "No vectors found for field " + fieldName;
 
+      this.pq = pq;
+      this.pqReaderIndex = pqReaderIndex;
       this.size = totalVectorsCount;
-
-      // always swap the leading reader to the first position
-      // For this part we need to make sure we also swap all the other metadata arrays that are
-      // indexed by reader index
-      // Such as readers, docMaps, liveDocs, baseOrds, deletedOrds
-      if (tempLeadingReaderIdx > 0) {
-        final KnnVectorsReader temp = readers[LEADING_READER_IDX];
-        readers[LEADING_READER_IDX] = readers[tempLeadingReaderIdx];
-        readers[tempLeadingReaderIdx] = temp;
-        // also swap the leading doc map to the first position to match the readers
-        final MergeState.DocMap tempDocMap = docMaps[LEADING_READER_IDX];
-        docMaps[LEADING_READER_IDX] = docMaps[tempLeadingReaderIdx];
-        docMaps[tempLeadingReaderIdx] = tempDocMap;
-        // swap base ords
-        final int tempBaseOrd = baseOrds[LEADING_READER_IDX];
-        baseOrds[LEADING_READER_IDX] = baseOrds[tempLeadingReaderIdx];
-        baseOrds[tempLeadingReaderIdx] = tempBaseOrd;
-      }
-
       this.perReaderFloatVectorValues = new FloatVectorValues[readers.length];
       this.dimension = dimension;
 
@@ -768,27 +757,24 @@ public void merge() throws IOException {
       // Get PQ compressor for leading reader
       final String fieldName = fieldInfo.name;
       final PQVectors pqVectors;
-      // Get the leading reader
-      final var leadingReader = readers[LEADING_READER_IDX].unwrapReaderForField(fieldName);
       // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the
       // remaining vectors
-      if (leadingReader instanceof JVectorReader reader
-          && reader.getProductQuantizationForField(fieldName).isPresent()) {
-        final ProductQuantization leadingCompressor =
-            reader.getProductQuantizationForField(fieldName).get();
-        // Refine the leadingCompressor with the remaining vectors in the merge, we skip the leading
-        // reader since it's already been
-        // used to create the leadingCompressor
-        // We assume the leading reader is ALWAYS the first one in the readers array
-        for (int i = LEADING_READER_IDX + 1; i < readers.length; i++) {
+      if (pq != null) {
+        // Refine the leadingCompressor with the remaining vectors in the merge
+        ProductQuantization newPq = pq;
+        for (int i = 0; i < readers.length; i++) {
+          if (i == pqReaderIndex) {
+            // Skip the reader associated with the re-used PQ codebook
+            continue;
+          }
           final FloatVectorValues values = readers[i].getFloatVectorValues(fieldName);
           final RandomAccessVectorValues randomAccessVectorValues =
               new RandomAccessVectorValuesOverVectorValues(values);
-          leadingCompressor.refine(randomAccessVectorValues);
+          newPq = newPq.refine(randomAccessVectorValues);
         }
         pqVectors =
             PQVectors.encodeAndBuild(
-                leadingCompressor,
+                newPq,
                 graphNodeIdsToRavvOrds.length,
                 graphNodeIdsToRavvOrds,
                 this,

From 31b973e4d3c32dc824b24384a5ef2ed616698770 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 3 Nov 2025 18:15:35 +0000
Subject: [PATCH 37/86] Dry out JVectorFloatVectorValues

---
 .../codecs/jvector/JVectorFloatVectorValues.java   | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index df9b71a385b8..d3b2007c1427 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -34,8 +34,6 @@ public class JVectorFloatVectorValues extends FloatVectorValues {
 
   private final OnDiskGraphIndex.View view;
   private final VectorSimilarityFunction similarityFunction;
-  private final int dimension;
-  private final int size;
   private final GraphNodeIdToDocMap graphNodeIdToDocMap;
 
   public JVectorFloatVectorValues(
@@ -44,20 +42,18 @@ public JVectorFloatVectorValues(
       GraphNodeIdToDocMap graphNodeIdToDocMap)
       throws IOException {
     this.view = onDiskGraphIndex.getView();
-    this.dimension = view.dimension();
-    this.size = view.size();
     this.similarityFunction = similarityFunction;
     this.graphNodeIdToDocMap = graphNodeIdToDocMap;
   }
 
   @Override
   public int dimension() {
-    return dimension;
+    return view.dimension();
   }
 
   @Override
   public int size() {
-    return size;
+    return view.size();
   }
 
   // This allows us to access the vector without copying it to float[]
@@ -90,7 +86,7 @@ public int docID() {
       public int nextDoc() throws IOException {
         // Advance to the next node docId starts from -1 which is why we need to increment docId by
         // 1 "size" times
-        while (docId < size - 1) {
+        while (docId < size() - 1) {
           docId++;
           if (liveNodes.get(docId)) {
             return docId;
@@ -118,10 +114,6 @@ public float[] vectorValue(int i) throws IOException {
     }
   }
 
-  public VectorFloat<?> vectorValueObject(int i) throws IOException {
-    return vectorFloatValue(i);
-  }
-
   @Override
   public FloatVectorValues copy() throws IOException {
     return this;

From 10a4287330d0ea3958847b82d80eaef48df54a80 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 3 Nov 2025 19:15:05 +0000
Subject: [PATCH 38/86] Fix JVectorFloatVectorValues VectorScorers

---
 .../jvector/JVectorFloatVectorValues.java     | 42 ++++++++++++++-
 .../sandbox/codecs/jvector/JVectorReader.java |  5 +-
 .../codecs/jvector/JVectorVectorScorer.java   | 54 -------------------
 3 files changed, 44 insertions(+), 57 deletions(-)
 delete mode 100644 lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index d3b2007c1427..4e8376966613 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -18,6 +18,8 @@
 package org.apache.lucene.sandbox.codecs.jvector;
 
 import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
+import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
+import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.util.Bits;
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.VectorizationProvider;
@@ -25,6 +27,7 @@
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
 import java.io.IOException;
 import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.VectorScorer;
 
 /// Implements Lucene vector access over a JVector on-disk index
@@ -33,15 +36,18 @@ public class JVectorFloatVectorValues extends FloatVectorValues {
       VectorizationProvider.getInstance().getVectorTypeSupport();
 
   private final OnDiskGraphIndex.View view;
+  private final PQVectors pq;
   private final VectorSimilarityFunction similarityFunction;
   private final GraphNodeIdToDocMap graphNodeIdToDocMap;
 
   public JVectorFloatVectorValues(
       OnDiskGraphIndex onDiskGraphIndex,
+      PQVectors pq,
       VectorSimilarityFunction similarityFunction,
       GraphNodeIdToDocMap graphNodeIdToDocMap)
       throws IOException {
     this.view = onDiskGraphIndex.getView();
+    this.pq = pq;
     this.similarityFunction = similarityFunction;
     this.graphNodeIdToDocMap = graphNodeIdToDocMap;
   }
@@ -121,7 +127,39 @@ public FloatVectorValues copy() throws IOException {
 
   @Override
   public VectorScorer scorer(float[] query) throws IOException {
-    return new JVectorVectorScorer(
-        this, VECTOR_TYPE_SUPPORT.createFloatVector(query), similarityFunction);
+    if (pq != null) {
+      final var vector = VECTOR_TYPE_SUPPORT.createFloatVector(query);
+      final var quantizedScoreFunction = pq.precomputedScoreFunctionFor(vector, similarityFunction);
+      return new JVectorScorer(quantizedScoreFunction, iterator());
+    } else {
+      return rescorer(query);
+    }
+  }
+
+  @Override
+  public VectorScorer rescorer(float[] target) throws IOException {
+    final var vector = VECTOR_TYPE_SUPPORT.createFloatVector(target);
+    final var scoreFunction = view.rerankerFor(vector, similarityFunction);
+    return new JVectorScorer(scoreFunction, iterator());
+  }
+
+  private static class JVectorScorer implements VectorScorer {
+    private final ScoreFunction scoreFunction;
+    private final DocIndexIterator iterator;
+
+    JVectorScorer(ScoreFunction scoreFunction, DocIndexIterator iterator) {
+      this.scoreFunction = scoreFunction;
+      this.iterator = iterator;
+    }
+
+    @Override
+    public float score() throws IOException {
+      return scoreFunction.similarityTo(iterator.index());
+    }
+
+    @Override
+    public DocIdSetIterator iterator() {
+      return iterator;
+    }
   }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index a7ca23e98132..ed2564130dad 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -130,7 +130,10 @@ public VectorScorer scorer(float[] target) throws IOException {
     }
 
     return new JVectorFloatVectorValues(
-        fieldEntry.index, fieldEntry.similarityFunction, fieldEntry.graphNodeIdToDocMap);
+        fieldEntry.index,
+        fieldEntry.pqVectors,
+        fieldEntry.similarityFunction,
+        fieldEntry.graphNodeIdToDocMap);
   }
 
   @Override
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
deleted file mode 100644
index 8c9006dd0901..000000000000
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorVectorScorer.java
+++ /dev/null
@@ -1,54 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.lucene.sandbox.codecs.jvector;
-
-import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
-import io.github.jbellis.jvector.vector.types.VectorFloat;
-import java.io.IOException;
-import org.apache.lucene.index.KnnVectorValues;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.VectorScorer;
-
-/// Implements Lucene scoring over a JVector index
-public class JVectorVectorScorer implements VectorScorer {
-  private final JVectorFloatVectorValues floatVectorValues;
-  private final KnnVectorValues.DocIndexIterator docIndexIterator;
-  private final VectorFloat<?> target;
-  private final VectorSimilarityFunction similarityFunction;
-
-  public JVectorVectorScorer(
-      JVectorFloatVectorValues vectorValues,
-      VectorFloat<?> target,
-      VectorSimilarityFunction similarityFunction) {
-    this.floatVectorValues = vectorValues;
-    this.docIndexIterator = floatVectorValues.iterator();
-    this.target = target;
-    this.similarityFunction = similarityFunction;
-  }
-
-  @Override
-  public float score() throws IOException {
-    return similarityFunction.compare(
-        target, floatVectorValues.vectorFloatValue(docIndexIterator.index()));
-  }
-
-  @Override
-  public DocIdSetIterator iterator() {
-    return docIndexIterator;
-  }
-}

From 65439575cf2a5e164149db4a8250cf033f2a8bd2 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 3 Nov 2025 19:17:44 +0000
Subject: [PATCH 39/86] Fix merging empty graphs

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 3f73810c9469..6e3499c5d26c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -690,7 +690,13 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       int graphNodeId = 0;
       // Build a new graph from scratch and compact the graph node ids
       for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
+        if (readers[readerIdx] == null) {
+          continue;
+        }
         final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
+        if (values == null || values.size() == 0) {
+          continue;
+        }
         perReaderFloatVectorValues[readerIdx] = values;
         // For each vector in this reader
         KnnVectorValues.DocIndexIterator it = values.iterator();

From 5f069189504d5a320b20aab0b81fe71bfc1fe892 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 3 Nov 2025 20:41:24 +0000
Subject: [PATCH 40/86] Pull dimension from merged fieldInfo

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java  | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 6e3499c5d26c..aee3767c647d 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -610,6 +610,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
         throws IOException {
       this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
       this.fieldInfo = fieldInfo;
+      this.dimension = fieldInfo.getVectorDimension();
 
       final String fieldName = fieldInfo.name;
 
@@ -618,7 +619,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       // between global ordinals and global lucene doc ids
       int totalVectorsCount = 0;
       int totalLiveVectorsCount = 0;
-      int dimension = 0;
       int pqReaderIndex = -1;
       ProductQuantization pq = null;
       int vectorsCountInLeadingReader = -1;
@@ -656,7 +656,7 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
               }
               totalVectorsCount += vectorCountInReader;
               totalLiveVectorsCount += liveVectorCountInReader;
-              dimension = Math.max(dimension, values.dimension());
+              assert values.dimension() == dimension;
             }
           }
         }
@@ -672,7 +672,6 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
       this.pqReaderIndex = pqReaderIndex;
       this.size = totalVectorsCount;
       this.perReaderFloatVectorValues = new FloatVectorValues[readers.length];
-      this.dimension = dimension;
 
       // Build mapping from global ordinal to [readerIndex, readerOrd]
       this.ravvOrdToReaderMapping = new int[totalDocsCount][2];

From 7cc847993bf51020a07a5b10ac5160feaa265da8 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 3 Nov 2025 20:53:33 +0000
Subject: [PATCH 41/86] Fix imports

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 25 +++++++++++++++----
 1 file changed, 20 insertions(+), 5 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index aee3767c647d..4c4bdca64a59 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -22,8 +22,11 @@
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_FLUSH;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_MERGE;
 
-import io.github.jbellis.jvector.graph.*;
-import io.github.jbellis.jvector.graph.disk.*;
+import io.github.jbellis.jvector.graph.GraphIndexBuilder;
+import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
+import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
+import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
+import io.github.jbellis.jvector.graph.disk.OnDiskSequentialGraphIndexWriter;
 import io.github.jbellis.jvector.graph.disk.feature.Feature;
 import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
 import io.github.jbellis.jvector.graph.disk.feature.InlineVectors;
@@ -36,7 +39,9 @@
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.io.UnsupportedEncodingException;
-import java.util.*;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
 import java.util.concurrent.ForkJoinPool;
 import java.util.function.IntUnaryOperator;
 import java.util.stream.IntStream;
@@ -44,9 +49,19 @@
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
-import org.apache.lucene.index.*;
+import org.apache.lucene.index.FieldInfo;
+import org.apache.lucene.index.FieldInfos;
+import org.apache.lucene.index.FloatVectorValues;
+import org.apache.lucene.index.IndexFileNames;
+import org.apache.lucene.index.KnnVectorValues;
+import org.apache.lucene.index.MergeState;
+import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.VectorEncoding;
+import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.store.*;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;

From 9f4a55717cb50cc593f55c192d1d888494942198 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 3 Nov 2025 22:15:57 +0000
Subject: [PATCH 42/86] Use RemappedRandomAccessVectorValues to abstract
 graphNodeIdsToRavvOrds

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 71 +++++--------------
 1 file changed, 18 insertions(+), 53 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 4c4bdca64a59..eeb36843adb4 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -26,6 +26,7 @@
 import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
 import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
 import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
+import io.github.jbellis.jvector.graph.RemappedRandomAccessVectorValues;
 import io.github.jbellis.jvector.graph.disk.OnDiskSequentialGraphIndexWriter;
 import io.github.jbellis.jvector.graph.disk.feature.Feature;
 import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
@@ -218,15 +219,11 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
     for (FieldWriter<?> field : fields) {
       final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
-      final int[] newToOldOrds = new int[randomAccessVectorValues.size()];
-      for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
-        newToOldOrds[ord] = ord;
-      }
       final BuildScoreProvider buildScoreProvider;
       final PQVectors pqVectors;
       final FieldInfo fieldInfo = field.fieldInfo;
       if (randomAccessVectorValues.size() >= minimumBatchSizeForQuantization) {
-        pqVectors = getPQVectors(newToOldOrds, randomAccessVectorValues, fieldInfo);
+        pqVectors = getPQVectors(randomAccessVectorValues, fieldInfo);
         buildScoreProvider =
             BuildScoreProvider.pqBuildScoreProvider(
                 getVectorSimilarityFunction(fieldInfo), pqVectors);
@@ -252,17 +249,11 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
           getGraph(
               buildScoreProvider,
               randomAccessVectorValues,
-              newToOldOrds,
               fieldInfo,
               segmentWriteState.segmentInfo.name,
               SIMD_POOL_FLUSH);
       writeField(
-          field.fieldInfo,
-          field.randomAccessVectorValues,
-          pqVectors,
-          newToOldOrds,
-          graphNodeIdToDocMap,
-          graph);
+          field.fieldInfo, field.randomAccessVectorValues, pqVectors, graphNodeIdToDocMap, graph);
     }
   }
 
@@ -270,18 +261,11 @@ private void writeField(
       FieldInfo fieldInfo,
       RandomAccessVectorValues randomAccessVectorValues,
       PQVectors pqVectors,
-      int[] newToOldOrds,
       GraphNodeIdToDocMap graphNodeIdToDocMap,
       OnHeapGraphIndex graph)
       throws IOException {
     final var vectorIndexFieldMetadata =
-        writeGraph(
-            graph,
-            randomAccessVectorValues,
-            fieldInfo,
-            pqVectors,
-            newToOldOrds,
-            graphNodeIdToDocMap);
+        writeGraph(graph, randomAccessVectorValues, fieldInfo, pqVectors, graphNodeIdToDocMap);
     meta.writeInt(fieldInfo.number);
     vectorIndexFieldMetadata.toOutput(meta);
   }
@@ -300,7 +284,6 @@ private VectorIndexFieldMetadata writeGraph(
       RandomAccessVectorValues randomAccessVectorValues,
       FieldInfo fieldInfo,
       PQVectors pqVectors,
-      int[] newToOldOrds,
       GraphNodeIdToDocMap graphNodeIdToDocMap)
       throws IOException {
     // field data file, which contains the graph
@@ -340,9 +323,7 @@ private VectorIndexFieldMetadata writeGraph(
         var suppliers =
             Feature.singleStateFactory(
                 FeatureId.INLINE_VECTORS,
-                nodeId ->
-                    new InlineVectors.State(
-                        randomAccessVectorValues.getVector(newToOldOrds[nodeId])));
+                nodeId -> new InlineVectors.State(randomAccessVectorValues.getVector(nodeId)));
         writer.write(suppliers);
         final long endGraphOffset = jVectorIndexWriter.position();
 
@@ -377,8 +358,7 @@ private VectorIndexFieldMetadata writeGraph(
   }
 
   private PQVectors getPQVectors(
-      int[] newToOldOrds, RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo)
-      throws IOException {
+      RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo) throws IOException {
     final VectorSimilarityFunction vectorSimilarityFunction =
         fieldInfo.getVectorSimilarityFunction();
     final int M =
@@ -396,11 +376,7 @@ private PQVectors getPQVectors(
             SIMD_POOL_MERGE,
             ForkJoinPool.commonPool());
 
-    // PQVectors pqVectors = pq.encodeAll(randomAccessVectorValues, SIMD_POOL);
-    PQVectors pqVectors =
-        PQVectors.encodeAndBuild(
-            pq, newToOldOrds.length, newToOldOrds, randomAccessVectorValues, SIMD_POOL_MERGE);
-    return pqVectors;
+    return pq.encodeAll(randomAccessVectorValues, SIMD_POOL_MERGE);
   }
 
   /// Metadata about the index to be persisted on disk
@@ -773,6 +749,8 @@ public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState merge
      * @throws IOException if there is an issue during reading or writing vector data.
      */
     public void merge() throws IOException {
+      final RandomAccessVectorValues mapped =
+          new RemappedRandomAccessVectorValues(this, graphNodeIdsToRavvOrds);
       // This section creates the PQVectors to be used for this merge
       // Get PQ compressor for leading reader
       final String fieldName = fieldInfo.name;
@@ -792,16 +770,10 @@ public void merge() throws IOException {
               new RandomAccessVectorValuesOverVectorValues(values);
           newPq = newPq.refine(randomAccessVectorValues);
         }
-        pqVectors =
-            PQVectors.encodeAndBuild(
-                newPq,
-                graphNodeIdsToRavvOrds.length,
-                graphNodeIdsToRavvOrds,
-                this,
-                SIMD_POOL_MERGE);
-      } else if (this.size() >= minimumBatchSizeForQuantization) {
+        pqVectors = newPq.encodeAll(mapped, SIMD_POOL_MERGE);
+      } else if (mapped.size() >= minimumBatchSizeForQuantization) {
         // No pre-existing codebooks, check if we have enough vectors to trigger quantization
-        pqVectors = getPQVectors(graphNodeIdsToRavvOrds, this, fieldInfo);
+        pqVectors = getPQVectors(mapped, fieldInfo);
       } else {
         pqVectors = null;
       }
@@ -816,20 +788,17 @@ public void merge() throws IOException {
         // threads)
         buildScoreProvider.diversityProviderFor(0);
       } else {
-        buildScoreProvider =
-            BuildScoreProvider.randomAccessScoreProvider(
-                this, graphNodeIdsToRavvOrds, getVectorSimilarityFunction(fieldInfo));
+        buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(mapped, getVectorSimilarityFunction(fieldInfo));
       }
       final OnHeapGraphIndex graph =
           getGraph(
               buildScoreProvider,
-              this,
-              graphNodeIdsToRavvOrds,
+              mapped,
               fieldInfo,
               segmentWriteState.segmentInfo.name,
               SIMD_POOL_MERGE);
 
-      writeField(fieldInfo, this, pqVectors, graphNodeIdsToRavvOrds, graphNodeIdToDocMap, graph);
+      writeField(fieldInfo, mapped, pqVectors, graphNodeIdToDocMap, graph);
     }
 
     @Override
@@ -884,7 +853,6 @@ public RandomAccessVectorValues copy() {
   public OnHeapGraphIndex getGraph(
       BuildScoreProvider buildScoreProvider,
       RandomAccessVectorValues randomAccessVectorValues,
-      int[] newToOldOrds,
       FieldInfo fieldInfo,
       String segmentName,
       ForkJoinPool SIMD_POOL) {
@@ -908,16 +876,13 @@ public OnHeapGraphIndex getGraph(
     var vv = randomAccessVectorValues.threadLocalSupplier();
 
     // parallel graph construction from the merge documents Ids
+    final int size = randomAccessVectorValues.size();
     SIMD_POOL
         .submit(
             () ->
-                IntStream.range(0, newToOldOrds.length)
+                IntStream.range(0, size)
                     .parallel()
-                    .forEach(
-                        ord -> {
-                          graphIndexBuilder.addGraphNode(
-                              ord, vv.get().getVector(newToOldOrds[ord]));
-                        }))
+                    .forEach(ord -> graphIndexBuilder.addGraphNode(ord, vv.get().getVector(ord))))
         .join();
     graphIndexBuilder.cleanup();
     graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();

From a4d32c435642c093a976c6bcc39e0c867297445a Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 17:52:14 +0000
Subject: [PATCH 43/86] Fix GraphNodeIdToDocMap deserialization

---
 .../lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java       | 1 +
 1 file changed, 1 insertion(+)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index 0bd8febec442..c526bbafe152 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -53,6 +53,7 @@ public GraphNodeIdToDocMap(IndexInput in) throws IOException {
 
     graphNodeIdsToDocIds = new int[size];
     docIdsToGraphNodeIds = new int[maxDocId];
+    Arrays.fill(docIdsToGraphNodeIds, -1);
     for (int ord = 0; ord < size; ord++) {
       final int docId = in.readVInt();
       graphNodeIdsToDocIds[ord] = docId;

From 765cfaefb6f47465587b70300254736e6c2d95e5 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 18:11:25 +0000
Subject: [PATCH 44/86] Remove type parameter from FieldWriter

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 31 ++++++-------------
 1 file changed, 9 insertions(+), 22 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index eeb36843adb4..e521de84c81b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -105,7 +105,7 @@ public class JVectorWriter extends KnnVectorsWriter {
   private static final long SHALLOW_RAM_BYTES_USED =
       RamUsageEstimator.shallowSizeOfInstance(JVectorWriter.class);
 
-  private final List<FieldWriter<?>> fields = new ArrayList<>();
+  private final List<FieldWriter> fields = new ArrayList<>();
 
   private final IndexOutput meta;
   private final IndexOutput vectorIndex;
@@ -193,7 +193,7 @@ public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException
               + "This can provides much greater savings in storage and memory";
       throw new UnsupportedOperationException(errorMessage);
     }
-    FieldWriter<?> newField = new FieldWriter<>(fieldInfo);
+    FieldWriter newField = new FieldWriter(fieldInfo);
 
     fields.add(newField);
     return newField;
@@ -217,7 +217,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
 
   @Override
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
-    for (FieldWriter<?> field : fields) {
+    for (FieldWriter field : fields) {
       final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
       final BuildScoreProvider buildScoreProvider;
       final PQVectors pqVectors;
@@ -469,7 +469,7 @@ public void close() throws IOException {
   @Override
   public long ramBytesUsed() {
     long total = SHALLOW_RAM_BYTES_USED;
-    for (FieldWriter<?> field : fields) {
+    for (FieldWriter field : fields) {
       // the field tracks the delegate field usage
       total += field.ramBytesUsed();
     }
@@ -480,11 +480,8 @@ public long ramBytesUsed() {
    * The FieldWriter class is responsible for writing vector field data into index segments. It
    * provides functionality to process vector values as those being added, manage memory usage, and
    * build HNSW graph indexing structures for efficient retrieval during search queries.
-   *
-   * @param <T> The type of vector value to be handled by the writer. This is often specialized to
-   *     support specific implementations, such as float[] or byte[] vectors.
    */
-  static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
+  static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
     private final FieldInfo fieldInfo;
     private int lastDocID = -1;
@@ -502,7 +499,7 @@ static class FieldWriter<T> extends KnnFieldVectorsWriter<T> {
     }
 
     @Override
-    public void addValue(int docID, T vectorValue) throws IOException {
+    public void addValue(int docID, float[] vectorValue) throws IOException {
       if (docID == lastDocID) {
         throw new IllegalArgumentException(
             "VectorValuesField \""
@@ -510,24 +507,14 @@ public void addValue(int docID, T vectorValue) throws IOException {
                 + "\" appears more than once in this document (only one value is allowed per field)");
       }
       docIds.add(docID);
-      if (vectorValue instanceof float[]) {
-        vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(vectorValue));
-      } else if (vectorValue instanceof byte[]) {
-        final String errorMessage =
-            "byte[] vectors are not supported in JVector. "
-                + "Instead you should only use float vectors and leverage product quantization during indexing."
-                + "This can provides much greater savings in storage and memory";
-        throw new UnsupportedOperationException(errorMessage);
-      } else {
-        throw new IllegalArgumentException("Unsupported vector type: " + vectorValue.getClass());
-      }
+      vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(vectorValue));
 
       lastDocID = docID;
     }
 
     @Override
-    public T copyValue(T vectorValue) {
-      throw new UnsupportedOperationException("copyValue not supported");
+    public float[] copyValue(float[] vectorValue) {
+      return vectorValue.clone();
     }
 
     @Override

From 19c9c3c0125645893c1d77fcb6040de6f3e971b4 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 18:14:13 +0000
Subject: [PATCH 45/86] Fix missing copy on FieldWriter.addValue

---
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index e521de84c81b..c5bc0407e73a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -507,7 +507,7 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
                 + "\" appears more than once in this document (only one value is allowed per field)");
       }
       docIds.add(docID);
-      vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(vectorValue));
+      vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(copyValue(vectorValue)));
 
       lastDocID = docID;
     }

From 8a9d5ab58113b1a6ef29bfa15b90ce54fa0e1c3e Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 18:17:48 +0000
Subject: [PATCH 46/86] Move DocIndexIterator logic to GraphNodeIdToDocMap

---
 .../codecs/jvector/GraphNodeIdToDocMap.java   | 46 +++++++++++++++++++
 .../jvector/JVectorFloatVectorValues.java     | 41 +----------------
 2 files changed, 47 insertions(+), 40 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index c526bbafe152..8c1b1339da48 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -17,9 +17,11 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
+import io.github.jbellis.jvector.util.Bits;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.KnnVectorValues.DocIndexIterator;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 
@@ -165,4 +167,48 @@ public void toOutput(IndexOutput out) throws IOException {
       out.writeVInt(graphNodeIdsToDocIds[ord]);
     }
   }
+
+  public DocIndexIterator iterator(Bits liveOrds) {
+    return new DocIndexIterator() {
+      int docId = -1;
+      @Override
+      public int index() {
+        return docIdsToGraphNodeIds[docId];
+      }
+
+      @Override
+      public int docID() {
+        return docId;
+      }
+
+      @Override
+      public int nextDoc() throws IOException {
+        while (docId < docIdsToGraphNodeIds.length - 1) {
+          ++docId;
+          final int ord = docIdsToGraphNodeIds[docId];
+          if (ord >= 0 && liveOrds.get(ord)) {
+            return docId;
+          }
+        }
+        return docId = NO_MORE_DOCS;
+      }
+
+      @Override
+      public int advance(int target) throws IOException {
+        if (target <= docId) {
+          throw new IllegalArgumentException();
+        } else if (target >= docIdsToGraphNodeIds.length) {
+          return docId = NO_MORE_DOCS;
+        }
+
+        docId = target - 1;
+        return nextDoc();
+      }
+
+      @Override
+      public long cost() {
+        return graphNodeIdsToDocIds.length;
+      }
+    };
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 4e8376966613..14a982dc8ab3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -20,7 +20,6 @@
 import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
 import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
 import io.github.jbellis.jvector.quantization.PQVectors;
-import io.github.jbellis.jvector.util.Bits;
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
@@ -69,45 +68,7 @@ public VectorFloat<?> vectorFloatValue(int ord) {
 
   @Override
   public DocIndexIterator iterator() {
-    return new DocIndexIterator() {
-      private int docId = -1;
-      private final Bits liveNodes = view.liveNodes();
-
-      @Override
-      public long cost() {
-        return size();
-      }
-
-      @Override
-      public int index() {
-        return graphNodeIdToDocMap.getJVectorNodeId(docId);
-      }
-
-      @Override
-      public int docID() {
-        return docId;
-      }
-
-      @Override
-      public int nextDoc() throws IOException {
-        // Advance to the next node docId starts from -1 which is why we need to increment docId by
-        // 1 "size" times
-        while (docId < size() - 1) {
-          docId++;
-          if (liveNodes.get(docId)) {
-            return docId;
-          }
-        }
-        docId = NO_MORE_DOCS;
-
-        return docId;
-      }
-
-      @Override
-      public int advance(int target) throws IOException {
-        return slowAdvance(target);
-      }
-    };
+    return graphNodeIdToDocMap.iterator(view.liveNodes());
   }
 
   @Override

From 64aefe5f013a78462fbad2ffe56342d3b6b69bae Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 5 Nov 2025 20:45:31 +0000
Subject: [PATCH 47/86] Replace OnDiskGraphIndex.View.liveBits() check with
 assertion

---
 .../lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java   | 5 ++---
 .../sandbox/codecs/jvector/JVectorFloatVectorValues.java     | 4 +++-
 2 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index 8c1b1339da48..f6ba30784994 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -17,7 +17,6 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
-import io.github.jbellis.jvector.util.Bits;
 import java.io.IOException;
 import java.util.Arrays;
 import org.apache.lucene.index.Sorter;
@@ -168,7 +167,7 @@ public void toOutput(IndexOutput out) throws IOException {
     }
   }
 
-  public DocIndexIterator iterator(Bits liveOrds) {
+  public DocIndexIterator iterator() {
     return new DocIndexIterator() {
       int docId = -1;
       @Override
@@ -186,7 +185,7 @@ public int nextDoc() throws IOException {
         while (docId < docIdsToGraphNodeIds.length - 1) {
           ++docId;
           final int ord = docIdsToGraphNodeIds[docId];
-          if (ord >= 0 && liveOrds.get(ord)) {
+          if (ord >= 0) {
             return docId;
           }
         }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 14a982dc8ab3..09047727baf1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -20,6 +20,7 @@
 import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
 import io.github.jbellis.jvector.graph.similarity.ScoreFunction;
 import io.github.jbellis.jvector.quantization.PQVectors;
+import io.github.jbellis.jvector.util.Bits.MatchAllBits;
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
@@ -68,7 +69,8 @@ public VectorFloat<?> vectorFloatValue(int ord) {
 
   @Override
   public DocIndexIterator iterator() {
-    return graphNodeIdToDocMap.iterator(view.liveNodes());
+    assert view.liveNodes() instanceof MatchAllBits : "All OnDiskGraphIndex nodes must be live";
+    return graphNodeIdToDocMap.iterator();
   }
 
   @Override

From 6defb82c41fb04c62a050cfcf79a1f0fc337b50b Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 18:37:18 +0000
Subject: [PATCH 48/86] Extract FieldWriter.randomAccessVectorValues to method

---
 .../lucene/sandbox/codecs/jvector/JVectorWriter.java | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index c5bc0407e73a..bf9458b91520 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -218,7 +218,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
   @Override
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
     for (FieldWriter field : fields) {
-      final RandomAccessVectorValues randomAccessVectorValues = field.randomAccessVectorValues;
+      final RandomAccessVectorValues randomAccessVectorValues = field.toRandomAccessVectorValues();
       final BuildScoreProvider buildScoreProvider;
       final PQVectors pqVectors;
       final FieldInfo fieldInfo = field.fieldInfo;
@@ -252,8 +252,7 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
               fieldInfo,
               segmentWriteState.segmentInfo.name,
               SIMD_POOL_FLUSH);
-      writeField(
-          field.fieldInfo, field.randomAccessVectorValues, pqVectors, graphNodeIdToDocMap, graph);
+      writeField(field.fieldInfo, randomAccessVectorValues, pqVectors, graphNodeIdToDocMap, graph);
     }
   }
 
@@ -485,7 +484,6 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
     private final FieldInfo fieldInfo;
     private int lastDocID = -1;
-    private final RandomAccessVectorValues randomAccessVectorValues;
     // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to
     // the jVector ordinal
     private final List<VectorFloat<?>> vectors = new ArrayList<>();
@@ -493,8 +491,6 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
 
     FieldWriter(FieldInfo fieldInfo) {
       /** For creating a new field from a flat field vectors writer. */
-      this.randomAccessVectorValues =
-          new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
       this.fieldInfo = fieldInfo;
     }
 
@@ -517,6 +513,10 @@ public float[] copyValue(float[] vectorValue) {
       return vectorValue.clone();
     }
 
+    public RandomAccessVectorValues toRandomAccessVectorValues() {
+      return new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
+    }
+
     @Override
     public long ramBytesUsed() {
       return SHALLOW_SIZE + (long) vectors.size() * fieldInfo.getVectorDimension() * Float.BYTES;

From ae5cb1619d93af7f5b146ee73dea538db41cc1f8 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 18:48:40 +0000
Subject: [PATCH 49/86] Use DocsWithFieldSet instead of List<Integer> in
 FieldWriter

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 20 +++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index bf9458b91520..58211b49c2ca 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -21,6 +21,7 @@
 import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_FLUSH;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_MERGE;
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 
 import io.github.jbellis.jvector.graph.GraphIndexBuilder;
 import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
@@ -50,6 +51,7 @@
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
 import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.index.DocsWithFieldSet;
 import org.apache.lucene.index.FieldInfo;
 import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FloatVectorValues;
@@ -237,8 +239,10 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
 
       // Generate the ord to doc mapping
       final int[] ordinalsToDocIds = new int[randomAccessVectorValues.size()];
-      for (int ord = 0; ord < randomAccessVectorValues.size(); ord++) {
-        ordinalsToDocIds[ord] = field.docIds.get(ord);
+      int ord = 0;
+      final var docIter = field.docIds.iterator();
+      for (int docId = docIter.nextDoc(); docId != NO_MORE_DOCS; docId = docIter.nextDoc()) {
+        ordinalsToDocIds[ord++] = docId;
       }
       final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(ordinalsToDocIds);
       if (sortMap != null) {
@@ -483,20 +487,20 @@ public long ramBytesUsed() {
   static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final long SHALLOW_SIZE = RamUsageEstimator.shallowSizeOfInstance(FieldWriter.class);
     private final FieldInfo fieldInfo;
-    private int lastDocID = -1;
     // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to
     // the jVector ordinal
     private final List<VectorFloat<?>> vectors = new ArrayList<>();
-    private final List<Integer> docIds = new ArrayList<>();
+    private DocsWithFieldSet docIds;
 
     FieldWriter(FieldInfo fieldInfo) {
       /** For creating a new field from a flat field vectors writer. */
       this.fieldInfo = fieldInfo;
+      this.docIds = new DocsWithFieldSet();
     }
 
     @Override
     public void addValue(int docID, float[] vectorValue) throws IOException {
-      if (docID == lastDocID) {
+      if (docID < docIds.cardinality()) {
         throw new IllegalArgumentException(
             "VectorValuesField \""
                 + fieldInfo.name
@@ -504,8 +508,6 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
       }
       docIds.add(docID);
       vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(copyValue(vectorValue)));
-
-      lastDocID = docID;
     }
 
     @Override
@@ -519,7 +521,9 @@ public RandomAccessVectorValues toRandomAccessVectorValues() {
 
     @Override
     public long ramBytesUsed() {
-      return SHALLOW_SIZE + (long) vectors.size() * fieldInfo.getVectorDimension() * Float.BYTES;
+      return SHALLOW_SIZE
+          + (long) vectors.size() * fieldInfo.getVectorDimension() * Float.BYTES
+          + docIds.ramBytesUsed();
     }
   }
 

From 19817295e94e19e4d8aceaf87661c2939e03d075 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 21:28:18 +0000
Subject: [PATCH 50/86] Add GraphNodeIdToDocMap constructor from
 DocsWithFieldSet

---
 .../codecs/jvector/GraphNodeIdToDocMap.java   | 32 +++++++++++++++++++
 .../sandbox/codecs/jvector/JVectorWriter.java | 14 +++-----
 2 files changed, 37 insertions(+), 9 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index f6ba30784994..29592c2198b9 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -17,9 +17,13 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
+import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
+
 import java.io.IOException;
+import java.io.UncheckedIOException;
 import java.util.Arrays;
 import org.apache.lucene.index.Sorter;
+import org.apache.lucene.index.DocsWithFieldSet;
 import org.apache.lucene.index.KnnVectorValues.DocIndexIterator;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
@@ -97,6 +101,33 @@ public GraphNodeIdToDocMap(int[] graphNodeIdsToDocIds) {
     }
   }
 
+  public GraphNodeIdToDocMap(DocsWithFieldSet docs) {
+    this.graphNodeIdsToDocIds = new int[docs.cardinality()];
+
+    int ord = 0;
+    int maxDocId = -1;
+    final var docsIterator = docs.iterator();
+    try {
+      for (int docId = docsIterator.nextDoc();
+          docId != NO_MORE_DOCS;
+          docId = docsIterator.nextDoc()) {
+        graphNodeIdsToDocIds[ord++] = docId;
+        if (docId > maxDocId) {
+          maxDocId = docId;
+        }
+      }
+    } catch (IOException e) {
+      // This should never happen; docsIterator should be FixedBitSet or DocSetIterator.all()
+      throw new UncheckedIOException(e);
+    }
+
+    this.docIdsToGraphNodeIds = new int[maxDocId + 1];
+    Arrays.fill(docIdsToGraphNodeIds, -1);
+    for (ord = 0; ord < graphNodeIdsToDocIds.length; ++ord) {
+      docIdsToGraphNodeIds[graphNodeIdsToDocIds[ord]] = ord;
+    }
+  }
+
   /**
    * Updates the mapping from the Lucene document IDs to the jVector ordinals based on the sort
    * operation. (during flush)
@@ -170,6 +201,7 @@ public void toOutput(IndexOutput out) throws IOException {
   public DocIndexIterator iterator() {
     return new DocIndexIterator() {
       int docId = -1;
+
       @Override
       public int index() {
         return docIdsToGraphNodeIds[docId];
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 58211b49c2ca..e4d320034551 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -21,7 +21,6 @@
 import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_FLUSH;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_MERGE;
-import static org.apache.lucene.search.DocIdSetIterator.NO_MORE_DOCS;
 
 import io.github.jbellis.jvector.graph.GraphIndexBuilder;
 import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
@@ -237,14 +236,7 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
                 randomAccessVectorValues, getVectorSimilarityFunction(fieldInfo));
       }
 
-      // Generate the ord to doc mapping
-      final int[] ordinalsToDocIds = new int[randomAccessVectorValues.size()];
-      int ord = 0;
-      final var docIter = field.docIds.iterator();
-      for (int docId = docIter.nextDoc(); docId != NO_MORE_DOCS; docId = docIter.nextDoc()) {
-        ordinalsToDocIds[ord++] = docId;
-      }
-      final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(ordinalsToDocIds);
+      final GraphNodeIdToDocMap graphNodeIdToDocMap = field.createGraphNodeIdToDocMap();
       if (sortMap != null) {
         graphNodeIdToDocMap.update(sortMap);
       }
@@ -519,6 +511,10 @@ public RandomAccessVectorValues toRandomAccessVectorValues() {
       return new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
     }
 
+    public GraphNodeIdToDocMap createGraphNodeIdToDocMap() {
+      return new GraphNodeIdToDocMap(docIds);
+    }
+
     @Override
     public long ramBytesUsed() {
       return SHALLOW_SIZE

From 3171addcd27b6161db8fa704c9af58069d7931fd Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 21:30:22 +0000
Subject: [PATCH 51/86] Fix sort sort-on-flush logic

---
 .../codecs/jvector/GraphNodeIdToDocMap.java   | 38 +------------------
 .../sandbox/codecs/jvector/JVectorWriter.java | 36 +++++++++++++++---
 2 files changed, 33 insertions(+), 41 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index 29592c2198b9..d0324f342268 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -22,7 +22,6 @@
 import java.io.IOException;
 import java.io.UncheckedIOException;
 import java.util.Arrays;
-import org.apache.lucene.index.Sorter;
 import org.apache.lucene.index.DocsWithFieldSet;
 import org.apache.lucene.index.KnnVectorValues.DocIndexIterator;
 import org.apache.lucene.store.IndexInput;
@@ -39,8 +38,8 @@
  */
 public class GraphNodeIdToDocMap {
   private static final int VERSION = 1;
-  private int[] graphNodeIdsToDocIds;
-  private int[] docIdsToGraphNodeIds;
+  private final int[] graphNodeIdsToDocIds;
+  private final int[] docIdsToGraphNodeIds;
 
   /**
    * Constructor that reads the mapping from the index input
@@ -128,39 +127,6 @@ public GraphNodeIdToDocMap(DocsWithFieldSet docs) {
     }
   }
 
-  /**
-   * Updates the mapping from the Lucene document IDs to the jVector ordinals based on the sort
-   * operation. (during flush)
-   *
-   * @param sortMap The sort map
-   */
-  public void update(Sorter.DocMap sortMap) {
-    final int[] newGraphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
-    final int maxNewDocId =
-        Arrays.stream(graphNodeIdsToDocIds).map(sortMap::oldToNew).max().getAsInt();
-    final int maxDocs = maxNewDocId + 1;
-    if (maxDocs < graphNodeIdsToDocIds.length) {
-      throw new IllegalStateException(
-          "Max docs "
-              + maxDocs
-              + " is less than the number of ordinals "
-              + graphNodeIdsToDocIds.length);
-    }
-    final int[] newDocIdsToOrdinals = new int[maxDocs];
-    Arrays.fill(newDocIdsToOrdinals, -1);
-    for (int oldDocId = 0; oldDocId < docIdsToGraphNodeIds.length; oldDocId++) {
-      if (docIdsToGraphNodeIds[oldDocId] == -1) {
-        continue;
-      }
-      final int newDocId = sortMap.oldToNew(oldDocId);
-      final int oldOrd = docIdsToGraphNodeIds[oldDocId];
-      newDocIdsToOrdinals[newDocId] = oldOrd;
-      newGraphNodeIdsToDocIds[oldOrd] = newDocId;
-    }
-    this.docIdsToGraphNodeIds = newDocIdsToOrdinals;
-    this.graphNodeIdsToDocIds = newGraphNodeIdsToDocIds;
-  }
-
   /**
    * Returns the jVector node id for the given Lucene document ID
    *
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index e4d320034551..e348cfb5f831 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -42,6 +42,7 @@
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.ForkJoinPool;
 import java.util.function.IntUnaryOperator;
@@ -98,7 +99,7 @@
  * {@link GraphNodeIdToDocMap} class in the index metadata and allowing us to update the mapping as
  * needed across merges by constructing a new mapping from the previous mapping and the {@link
  * org.apache.lucene.index.MergeState.DocMap} provided in the {@link MergeState}. And across sorts
- * with {@link GraphNodeIdToDocMap#update(Sorter.DocMap)} during flushes.
+ * with {@link FieldWriter#applySort(Sorter.DocMap)} during flushes.
  */
 public class JVectorWriter extends KnnVectorsWriter {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
@@ -219,6 +220,9 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
   @Override
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
     for (FieldWriter field : fields) {
+      if (sortMap != null) {
+        field.applySort(sortMap);
+      }
       final RandomAccessVectorValues randomAccessVectorValues = field.toRandomAccessVectorValues();
       final BuildScoreProvider buildScoreProvider;
       final PQVectors pqVectors;
@@ -237,10 +241,6 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
       }
 
       final GraphNodeIdToDocMap graphNodeIdToDocMap = field.createGraphNodeIdToDocMap();
-      if (sortMap != null) {
-        graphNodeIdToDocMap.update(sortMap);
-      }
-
       OnHeapGraphIndex graph =
           getGraph(
               buildScoreProvider,
@@ -507,6 +507,32 @@ public float[] copyValue(float[] vectorValue) {
       return vectorValue.clone();
     }
 
+    public void applySort(Sorter.DocMap sortMap) throws IOException {
+      // Ensure that all existing docs can be sorted
+      final int[] oldToNewOrd = new int[vectors.size()];
+      final DocsWithFieldSet oldDocIds = docIds;
+      docIds = new DocsWithFieldSet();
+      mapOldOrdToNewOrd(oldDocIds, sortMap, oldToNewOrd, null, docIds);
+
+      // Swap vectors into their new ordinals
+      for (int oldOrd = 0; oldOrd < vectors.size(); ++oldOrd) {
+        final int newOrd = oldToNewOrd[oldOrd];
+        if (oldOrd == newOrd) {
+          continue;
+        }
+
+        // Swap the element at oldOrd into its position at newOrd and update the index mapping
+        Collections.swap(vectors, oldOrd, newOrd);
+        oldToNewOrd[oldOrd] = oldToNewOrd[newOrd];
+        oldToNewOrd[newOrd] = newOrd;
+
+        // The element at oldOrd may be displaced and need to be swapped again
+        if (oldToNewOrd[oldOrd] != oldOrd) {
+          oldOrd -= 1;
+        }
+      }
+    }
+
     public RandomAccessVectorValues toRandomAccessVectorValues() {
       return new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
     }

From 38e0b47afa5c50cb8caded61ae9b85ce96f28256 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 22:45:26 +0000
Subject: [PATCH 52/86] Fix JVectorFloatVectorValues.ordToDoc

---
 .../sandbox/codecs/jvector/JVectorFloatVectorValues.java     | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 09047727baf1..2af052ee2789 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -62,6 +62,11 @@ public int size() {
     return view.size();
   }
 
+  @Override
+  public int ordToDoc(int ord) {
+    return graphNodeIdToDocMap.getLuceneDocId(ord);
+  }
+
   // This allows us to access the vector without copying it to float[]
   public VectorFloat<?> vectorFloatValue(int ord) {
     return view.getVector(ord);

From b2340dd20b91e4a241cda0c849a988242fcdd55c Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 4 Nov 2025 22:45:10 +0000
Subject: [PATCH 53/86] Pull out merging functionality into method

---
 .../codecs/jvector/GraphNodeIdToDocMap.java   |  35 --
 .../jvector/JVectorFloatVectorValues.java     |   4 +
 .../sandbox/codecs/jvector/JVectorWriter.java | 432 +++++++-----------
 3 files changed, 175 insertions(+), 296 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
index d0324f342268..0c733c73b34b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/GraphNodeIdToDocMap.java
@@ -65,41 +65,6 @@ public GraphNodeIdToDocMap(IndexInput in) throws IOException {
     }
   }
 
-  /**
-   * Constructor that creates a new mapping between ordinals and docIds
-   *
-   * @param graphNodeIdsToDocIds The mapping from ordinals to docIds
-   */
-  public GraphNodeIdToDocMap(int[] graphNodeIdsToDocIds) {
-    if (graphNodeIdsToDocIds.length == 0) {
-      this.graphNodeIdsToDocIds = new int[0];
-      this.docIdsToGraphNodeIds = new int[0];
-      return;
-    }
-    this.graphNodeIdsToDocIds = new int[graphNodeIdsToDocIds.length];
-    System.arraycopy(
-        graphNodeIdsToDocIds, 0, this.graphNodeIdsToDocIds, 0, graphNodeIdsToDocIds.length);
-    final int maxDocId = Arrays.stream(graphNodeIdsToDocIds).max().getAsInt();
-    final int maxDocs = maxDocId + 1;
-    // We are going to assume that the number of ordinals is roughly the same as the number of
-    // documents in the segment, therefore,
-    // the mapping will not be sparse.
-    if (maxDocs < graphNodeIdsToDocIds.length) {
-      throw new IllegalStateException(
-          "Max docs "
-              + maxDocs
-              + " is less than the number of ordinals "
-              + graphNodeIdsToDocIds.length);
-    }
-    // When maxDocId > graphNodeIdsToDocIds.length, there are lots of deleted documents or missing
-    // values, which wastes memory
-    this.docIdsToGraphNodeIds = new int[maxDocs];
-    Arrays.fill(this.docIdsToGraphNodeIds, -1); // -1 means no mapping to ordinal
-    for (int ord = 0; ord < graphNodeIdsToDocIds.length; ord++) {
-      this.docIdsToGraphNodeIds[graphNodeIdsToDocIds[ord]] = ord;
-    }
-  }
-
   public GraphNodeIdToDocMap(DocsWithFieldSet docs) {
     this.graphNodeIdsToDocIds = new int[docs.cardinality()];
 
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 2af052ee2789..0adc733f8dc6 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -72,6 +72,10 @@ public VectorFloat<?> vectorFloatValue(int ord) {
     return view.getVector(ord);
   }
 
+  public void getVectorInto(int node, VectorFloat<?> vector, int offset) {
+    view.getVectorInto(node, vector, offset);
+  }
+
   @Override
   public DocIndexIterator iterator() {
     assert view.liveNodes() instanceof MatchAllBits : "All OnDiskGraphIndex nodes must be live";
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index e348cfb5f831..38670eb23157 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -26,7 +26,6 @@
 import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
 import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
 import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
-import io.github.jbellis.jvector.graph.RemappedRandomAccessVectorValues;
 import io.github.jbellis.jvector.graph.disk.OnDiskSequentialGraphIndexWriter;
 import io.github.jbellis.jvector.graph.disk.feature.Feature;
 import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
@@ -49,11 +48,10 @@
 import java.util.stream.IntStream;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.KnnFieldVectorsWriter;
-import org.apache.lucene.codecs.KnnVectorsReader;
 import org.apache.lucene.codecs.KnnVectorsWriter;
+import org.apache.lucene.index.DocIDMerger;
 import org.apache.lucene.index.DocsWithFieldSet;
 import org.apache.lucene.index.FieldInfo;
-import org.apache.lucene.index.FieldInfos;
 import org.apache.lucene.index.FloatVectorValues;
 import org.apache.lucene.index.IndexFileNames;
 import org.apache.lucene.index.KnnVectorValues;
@@ -62,10 +60,8 @@
 import org.apache.lucene.index.Sorter;
 import org.apache.lucene.index.VectorEncoding;
 import org.apache.lucene.index.VectorSimilarityFunction;
-import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.util.Bits;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.RamUsageEstimator;
 
@@ -208,8 +204,7 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
         case BYTE:
           throw new UnsupportedEncodingException("Byte vectors are not supported in JVector.");
         case FLOAT32:
-          final var mergeRavv = new RandomAccessMergedFloatVectorValues(fieldInfo, mergeState);
-          mergeRavv.merge();
+          mergeAndWriteField(fieldInfo, mergeState);
           break;
       }
     } catch (Exception e) {
@@ -562,261 +557,167 @@ static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimila
     };
   }
 
-  /**
-   * Implementation of RandomAccessVectorValues that directly uses the source FloatVectorValues from
-   * multiple segments without copying the vectors.
-   *
-   * <p>Some details about the implementation logic:
-   *
-   * <p>First, we identify the leading reader, which is the one with the most live vectors. Second,
-   * we build a mapping between the ravv ordinals and the reader index and the ordinal in that
-   * reader. Third, we build a mapping between the ravv ordinals and the global doc ids.
-   *
-   * <p>Very important to note that for the leading graph the node Ids need to correspond to their
-   * original ravv ordinals in the reader. This is because we are later going to expand that graph
-   * with new vectors from the other readers. While the new vectors can be assigned arbitrary node
-   * Ids, the leading graph needs to preserve its original node Ids and map them to the original
-   * ravv vector ordinals.
-   */
-  class RandomAccessMergedFloatVectorValues implements RandomAccessVectorValues {
-    private static final int READER_ID = 0;
-    private static final int READER_ORD = 1;
-
-    // Array of sub-readers
-    private final KnnVectorsReader[] readers;
-    private final FloatVectorValues[] perReaderFloatVectorValues;
-
-    // Maps the ravv ordinals to the reader index and the ordinal in that reader. This is allowing
-    // us to get a unified view of all the
-    // vectors in all the readers with a single unified ordinal space.
-    private final int[][] ravvOrdToReaderMapping;
-
-    // Total number of vectors
-    private final int size;
-    // Total number of documents including those without values
-    private final int totalDocsCount;
+  private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
+    assert fieldInfo.hasVectorValues();
+    final int dimension = fieldInfo.getVectorDimension();
+    final int mergeCount = mergeState.knnVectorsReaders.length;
+
+    // Collect the sub-readers into a list to make a DocIdMerger
+    final List<SubFloatVectors> subs = new ArrayList<>(mergeCount);
+    final FloatVectorValues[] vectors = new FloatVectorValues[mergeCount];
+    for (int i = 0; i < mergeCount; ++i) {
+      if (false == MergedVectorValues.hasVectorValues(mergeState.fieldInfos[i], fieldInfo.name)) {
+        continue;
+      }
+      final var reader = mergeState.knnVectorsReaders[i];
+      if (reader == null) {
+        continue;
+      }
+      final var values = reader.getFloatVectorValues(fieldInfo.name);
+      if (values == null || values.size() == 0) {
+        continue;
+      }
 
-    // Vector dimension
-    private final int dimension;
-    private final FieldInfo fieldInfo;
-    private final GraphNodeIdToDocMap graphNodeIdToDocMap;
-    private final int[] graphNodeIdsToRavvOrds;
-    private final int pqReaderIndex;
-    private final ProductQuantization pq;
-
-    /**
-     * Creates a random access view over merged float vector values.
-     *
-     * @param fieldInfo Field info for the vector field
-     * @param mergeState Merge state containing readers and doc maps
-     */
-    public RandomAccessMergedFloatVectorValues(FieldInfo fieldInfo, MergeState mergeState)
-        throws IOException {
-      this.totalDocsCount = Math.toIntExact(Arrays.stream(mergeState.maxDocs).asLongStream().sum());
-      this.fieldInfo = fieldInfo;
-      this.dimension = fieldInfo.getVectorDimension();
-
-      final String fieldName = fieldInfo.name;
-
-      // Count total vectors, collect readers and identify leading reader, collect base ordinals to
-      // later be used to build the mapping
-      // between global ordinals and global lucene doc ids
-      int totalVectorsCount = 0;
-      int totalLiveVectorsCount = 0;
-      int pqReaderIndex = -1;
-      ProductQuantization pq = null;
-      int vectorsCountInLeadingReader = -1;
-      this.readers = mergeState.knnVectorsReaders.clone();
-      final MergeState.DocMap[] docMaps = mergeState.docMaps.clone();
-      final Bits[] liveDocs = mergeState.liveDocs.clone();
-      final int[] baseOrds = new int[mergeState.knnVectorsReaders.length];
-
-      // Find the leading reader, count the total number of live vectors, and the base ordinals for
-      // each reader
-      for (int i = 0; i < mergeState.knnVectorsReaders.length; i++) {
-        FieldInfos fieldInfos = mergeState.fieldInfos[i];
-        baseOrds[i] = totalVectorsCount;
-        if (MergedVectorValues.hasVectorValues(fieldInfos, fieldName)) {
-          KnnVectorsReader reader = mergeState.knnVectorsReaders[i].unwrapReaderForField(fieldName);
-          if (reader != null) {
-            FloatVectorValues values = reader.getFloatVectorValues(fieldName);
-            if (values != null) {
-              int vectorCountInReader = values.size();
-              int liveVectorCountInReader = 0;
-              KnnVectorValues.DocIndexIterator it = values.iterator();
-              while (it.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) {
-                if (liveDocs[i] == null || liveDocs[i].get(it.docID())) {
-                  liveVectorCountInReader++;
-                }
-              }
-              if (reader instanceof JVectorReader jVectorReader
-                  && liveVectorCountInReader >= vectorsCountInLeadingReader) {
-                vectorsCountInLeadingReader = liveVectorCountInReader;
-                final var maybeNewPq = jVectorReader.getProductQuantizationForField(fieldName);
-                if (maybeNewPq.isPresent()) {
-                  pqReaderIndex = i;
-                  pq = maybeNewPq.get();
-                }
-              }
-              totalVectorsCount += vectorCountInReader;
-              totalLiveVectorsCount += liveVectorCountInReader;
-              assert values.dimension() == dimension;
-            }
+      assert values.dimension() == dimension;
+      subs.add(new SubFloatVectors(mergeState.docMaps[i], i, values));
+      vectors[i] = values;
+    }
+
+    // These arrays may be larger than strictly necessary if there are deleted docs/missing fields
+    final int totalMaxDocs = Arrays.stream(mergeState.maxDocs).reduce(0, Math::addExact);
+    final int[] liveDocCounts = new int[mergeCount];
+    final DocsWithFieldSet docIds = new DocsWithFieldSet();
+    final int[] ordToReaderIndex = new int[totalMaxDocs];
+    final int[] ordToReaderOrd = new int[totalMaxDocs];
+
+    // Construct ordinal mappings for the new graph
+    int ord = 0;
+    final var docIdMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
+    for (var sub = docIdMerger.next(); sub != null; sub = docIdMerger.next()) {
+      final int readerIndex = sub.readerIndex;
+      liveDocCounts[readerIndex] += 1;
+      docIds.add(sub.mappedDocID);
+      ordToReaderIndex[ord] = sub.readerIndex;
+      ordToReaderOrd[ord] = sub.index();
+      ord += 1;
+    }
+
+    // Make a RandomAccessVectorValues instance using the new graph ordinals
+    final int totalLiveDocsCount = ord;
+    final var ravv =
+        new RandomAccessMergedFloatVectorValues(
+            totalLiveDocsCount,
+            dimension,
+            vectors,
+            i -> ordToReaderIndex[i],
+            i -> ordToReaderOrd[i]);
+
+    // Find the largest quantized reader to re-use its PQ codebook, if possible
+    int largestQuantizedReaderIndex = 0;
+    ProductQuantization pq = null;
+    for (int i = 0; i < liveDocCounts.length; ++i) {
+      if (liveDocCounts[i] > liveDocCounts[largestQuantizedReaderIndex]) {
+        if (mergeState.knnVectorsReaders[i] instanceof JVectorReader jVectorReader) {
+          final var maybeNewPq = jVectorReader.getProductQuantizationForField(fieldInfo.name);
+          if (maybeNewPq.isPresent()) {
+            largestQuantizedReaderIndex = i;
+            pq = maybeNewPq.get();
           }
         }
       }
+    }
 
-      assert (totalVectorsCount <= totalDocsCount)
-          : "Total number of vectors exceeds the total number of documents";
-      assert (totalLiveVectorsCount <= totalVectorsCount)
-          : "Total number of live vectors exceeds the total number of vectors";
-      assert (dimension > 0) : "No vectors found for field " + fieldName;
-
-      this.pq = pq;
-      this.pqReaderIndex = pqReaderIndex;
-      this.size = totalVectorsCount;
-      this.perReaderFloatVectorValues = new FloatVectorValues[readers.length];
-
-      // Build mapping from global ordinal to [readerIndex, readerOrd]
-      this.ravvOrdToReaderMapping = new int[totalDocsCount][2];
-
-      int documentsIterated = 0;
-
-      // Will be used to build the new graphNodeIdToDocMap with the new graph node id to docId
-      // mapping.
-      // This mapping should not be used to access the vectors at any time during construction, but
-      // only after the merge is complete
-      // and the new segment is created and used by searchers.
-      final int[] graphNodeIdToDocIds = new int[totalLiveVectorsCount];
-      this.graphNodeIdsToRavvOrds = new int[totalLiveVectorsCount];
-
-      int graphNodeId = 0;
-      // Build a new graph from scratch and compact the graph node ids
-      for (int readerIdx = 0; readerIdx < readers.length; readerIdx++) {
-        if (readers[readerIdx] == null) {
+    // Perform PQ if applicable
+    final PQVectors pqVectors;
+    if (pq != null) {
+      // Refine the leadingCompressor with the remaining vectors in the merge
+      ProductQuantization newPq = pq;
+      for (int i = 0; i < mergeCount; i++) {
+        if (i == largestQuantizedReaderIndex || vectors[i] == null) {
+          // Skip the reader associated with the re-used PQ codebook
           continue;
         }
-        final FloatVectorValues values = readers[readerIdx].getFloatVectorValues(fieldName);
-        if (values == null || values.size() == 0) {
-          continue;
-        }
-        perReaderFloatVectorValues[readerIdx] = values;
-        // For each vector in this reader
-        KnnVectorValues.DocIndexIterator it = values.iterator();
-
-        for (int docId = it.nextDoc();
-            docId != DocIdSetIterator.NO_MORE_DOCS;
-            docId = it.nextDoc()) {
-          if (docMaps[readerIdx].get(docId) != -1) {
-            // Mapping from ravv ordinals to [readerIndex, readerOrd]
-            // Map graph node id to ravv ordinal
-            // Map graph node id to doc id
-            final int newGlobalDocId = docMaps[readerIdx].get(docId);
-            final int ravvLocalOrd = it.index();
-            final int ravvGlobalOrd = ravvLocalOrd + baseOrds[readerIdx];
-            graphNodeIdToDocIds[graphNodeId] = newGlobalDocId;
-            graphNodeIdsToRavvOrds[graphNodeId] = ravvGlobalOrd;
-            graphNodeId++;
-            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ID] = readerIdx; // Reader index
-            ravvOrdToReaderMapping[ravvGlobalOrd][READER_ORD] = ravvLocalOrd; // Ordinal in reader
-          }
-
-          documentsIterated++;
-        }
+        final FloatVectorValues values = vectors[i];
+        final RandomAccessVectorValues randomAccessVectorValues =
+            new RandomAccessVectorValuesOverVectorValues(values);
+        newPq = newPq.refine(randomAccessVectorValues);
       }
+      pqVectors = newPq.encodeAll(ravv, SIMD_POOL_MERGE);
+    } else if (ravv.size() >= minimumBatchSizeForQuantization) {
+      // No pre-existing codebooks, check if we have enough vectors to trigger quantization
+      pqVectors = getPQVectors(ravv, fieldInfo);
+    } else {
+      pqVectors = null;
+    }
+
+    final BuildScoreProvider buildScoreProvider;
+    final var similarityFunction = getVectorSimilarityFunction(fieldInfo);
+    if (pqVectors != null) {
+      // Re-use PQ codebooks to build a new graph from scratch
+      buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors);
+      // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD
+      // threads)
+      buildScoreProvider.diversityProviderFor(0);
+    } else {
+      buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(ravv, similarityFunction);
+    }
+    final var graphNodeIdToDocMap = new GraphNodeIdToDocMap(docIds);
+    final var graph =
+        getGraph(
+            buildScoreProvider,
+            ravv,
+            fieldInfo,
+            segmentWriteState.segmentInfo.name,
+            SIMD_POOL_MERGE);
+    writeField(fieldInfo, ravv, pqVectors, graphNodeIdToDocMap, graph);
+  }
 
-      if (documentsIterated < totalVectorsCount) {
-        throw new IllegalStateException(
-            "More documents were expected than what was found in the readers."
-                + "Expected at least number of total vectors: "
-                + totalVectorsCount
-                + " but found only: "
-                + documentsIterated
-                + " documents.");
-      }
+  private static final class SubFloatVectors extends DocIDMerger.Sub {
+    final int readerIndex;
+    final KnnVectorValues.DocIndexIterator iterator;
+    int docId = -1;
 
-      this.graphNodeIdToDocMap = new GraphNodeIdToDocMap(graphNodeIdToDocIds);
-    }
-
-    /**
-     * Merges the float vector values from multiple readers into a unified structure. This process
-     * includes handling product quantization (PQ) for vector compression, generating ord-to-doc
-     * mappings, and writing the merged index into a new segment file.
-     *
-     * <p>The method determines if pre-existing product quantization codebooks are available from
-     * the leading reader. If available, it refines them using remaining vectors from other readers
-     * in the merge. If no pre-existing codebooks are found and the total vector count meets the
-     * required minimum threshold, new codebooks and compressed vectors are computed. Otherwise, no
-     * PQ compression is applied.
-     *
-     * <p>Also, it generates a mapping of ordinals to document IDs by iterating through the provided
-     * vector data, which is further used to write the field data.
-     *
-     * <p>In the event of no deletes or quantization, the graph construction is done by
-     * incrementally adding vectors from smaller segments into the largest segment. For all other
-     * cases, we build a new graph from scratch from all the vectors.
-     *
-     * <p>TODO: Add support for incremental graph building with quantization see <a
-     * href="https://github.com/opensearch-project/opensearch-jvector/issues/166">issue</a>
-     *
-     * @throws IOException if there is an issue during reading or writing vector data.
-     */
-    public void merge() throws IOException {
-      final RandomAccessVectorValues mapped =
-          new RemappedRandomAccessVectorValues(this, graphNodeIdsToRavvOrds);
-      // This section creates the PQVectors to be used for this merge
-      // Get PQ compressor for leading reader
-      final String fieldName = fieldInfo.name;
-      final PQVectors pqVectors;
-      // Check if the leading reader has pre-existing PQ codebooks and if so, refine them with the
-      // remaining vectors
-      if (pq != null) {
-        // Refine the leadingCompressor with the remaining vectors in the merge
-        ProductQuantization newPq = pq;
-        for (int i = 0; i < readers.length; i++) {
-          if (i == pqReaderIndex) {
-            // Skip the reader associated with the re-used PQ codebook
-            continue;
-          }
-          final FloatVectorValues values = readers[i].getFloatVectorValues(fieldName);
-          final RandomAccessVectorValues randomAccessVectorValues =
-              new RandomAccessVectorValuesOverVectorValues(values);
-          newPq = newPq.refine(randomAccessVectorValues);
-        }
-        pqVectors = newPq.encodeAll(mapped, SIMD_POOL_MERGE);
-      } else if (mapped.size() >= minimumBatchSizeForQuantization) {
-        // No pre-existing codebooks, check if we have enough vectors to trigger quantization
-        pqVectors = getPQVectors(mapped, fieldInfo);
-      } else {
-        pqVectors = null;
-      }
+    SubFloatVectors(MergeState.DocMap docMap, int readerIndex, FloatVectorValues values) {
+      super(docMap);
+      this.readerIndex = readerIndex;
+      this.iterator = values.iterator();
+    }
 
-      final BuildScoreProvider buildScoreProvider;
-      if (pqVectors != null) {
-        // Re-use PQ codebooks to build a new graph from scratch
-        buildScoreProvider =
-            BuildScoreProvider.pqBuildScoreProvider(
-                getVectorSimilarityFunction(fieldInfo), pqVectors);
-        // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD
-        // threads)
-        buildScoreProvider.diversityProviderFor(0);
-      } else {
-        buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(mapped, getVectorSimilarityFunction(fieldInfo));
-      }
-      final OnHeapGraphIndex graph =
-          getGraph(
-              buildScoreProvider,
-              mapped,
-              fieldInfo,
-              segmentWriteState.segmentInfo.name,
-              SIMD_POOL_MERGE);
+    @Override
+    public int nextDoc() throws IOException {
+      docId = iterator.nextDoc();
+      return docId;
+    }
 
-      writeField(fieldInfo, mapped, pqVectors, graphNodeIdToDocMap, graph);
+    public int index() {
+      return iterator.index();
+    }
+  }
+
+  private static final class RandomAccessMergedFloatVectorValues
+      implements RandomAccessVectorValues {
+    private final int size;
+    private final int dimension;
+    private final FloatVectorValues[] vectors;
+    private final IntUnaryOperator ordToReader;
+    private final IntUnaryOperator ordToReaderOrd;
+
+    public RandomAccessMergedFloatVectorValues(
+        int size,
+        int dimension,
+        FloatVectorValues[] values,
+        IntUnaryOperator ordToReader,
+        IntUnaryOperator ordToReaderOrd) {
+      this.size = size;
+      this.dimension = dimension;
+      this.vectors = values;
+      this.ordToReader = ordToReader;
+      this.ordToReaderOrd = ordToReaderOrd;
     }
 
     @Override
-    public int size() {
-      return size;
+    public RandomAccessMergedFloatVectorValues copy() {
+      throw new UnsupportedOperationException();
     }
 
     @Override
@@ -825,25 +726,34 @@ public int dimension() {
     }
 
     @Override
-    public VectorFloat<?> getVector(int ord) {
-      if (ord < 0 || ord >= totalDocsCount) {
-        throw new IllegalArgumentException("Ordinal out of bounds: " + ord);
-      }
+    public VectorFloat<?> getVector(int nodeId) {
+      final var vector = VECTOR_TYPE_SUPPORT.createFloatVector(dimension);
+      getVectorInto(nodeId, vector, 0);
+      return vector;
+    }
 
-      final int readerIdx = ravvOrdToReaderMapping[ord][READER_ID];
-      final int readerOrd = ravvOrdToReaderMapping[ord][READER_ORD];
+    @Override
+    public void getVectorInto(int node, VectorFloat<?> destinationVector, int offset) {
+      final FloatVectorValues values = vectors[ordToReader.applyAsInt(node)];
+      final int ord = ordToReaderOrd.applyAsInt(node);
 
-      // Access to float values is not thread safe
-      synchronized (perReaderFloatVectorValues[readerIdx]) {
-        if (perReaderFloatVectorValues[readerIdx] instanceof JVectorFloatVectorValues values) {
-          return values.vectorFloatValue(readerOrd);
+      if (values instanceof JVectorFloatVectorValues jVectorValues) {
+        synchronized (this) {
+          jVectorValues.getVectorInto(ord, destinationVector, offset);
         }
+      }
+
+      synchronized (this) {
+        final float[] srcVector;
         try {
-          return VECTOR_TYPE_SUPPORT.createFloatVector(
-              perReaderFloatVectorValues[readerIdx].vectorValue(readerOrd));
+          srcVector = values.vectorValue(ord);
         } catch (IOException e) {
           throw new UncheckedIOException(e);
         }
+
+        for (int i = 0; i < srcVector.length; ++i) {
+          destinationVector.set(i + offset, srcVector[i]);
+        }
       }
     }
 
@@ -853,8 +763,8 @@ public boolean isValueShared() {
     }
 
     @Override
-    public RandomAccessVectorValues copy() {
-      throw new UnsupportedOperationException("Copy not supported");
+    public int size() {
+      return size;
     }
   }
 

From b2e587b7710be330e5209e9aa726589aa4571959 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 5 Nov 2025 16:34:52 +0000
Subject: [PATCH 54/86] Remove synchronized from
 RandomAccessMergedFloatVectorValues

---
 .../jvector/JVectorFloatVectorValues.java     |  8 ++--
 .../sandbox/codecs/jvector/JVectorWriter.java | 38 +++++++++++--------
 2 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
index 0adc733f8dc6..ccbe286c776c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFloatVectorValues.java
@@ -35,18 +35,20 @@ public class JVectorFloatVectorValues extends FloatVectorValues {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
 
+  private final OnDiskGraphIndex index;
   private final OnDiskGraphIndex.View view;
   private final PQVectors pq;
   private final VectorSimilarityFunction similarityFunction;
   private final GraphNodeIdToDocMap graphNodeIdToDocMap;
 
   public JVectorFloatVectorValues(
-      OnDiskGraphIndex onDiskGraphIndex,
+      OnDiskGraphIndex index,
       PQVectors pq,
       VectorSimilarityFunction similarityFunction,
       GraphNodeIdToDocMap graphNodeIdToDocMap)
       throws IOException {
-    this.view = onDiskGraphIndex.getView();
+    this.index = index;
+    this.view = index.getView();
     this.pq = pq;
     this.similarityFunction = similarityFunction;
     this.graphNodeIdToDocMap = graphNodeIdToDocMap;
@@ -94,7 +96,7 @@ public float[] vectorValue(int i) throws IOException {
 
   @Override
   public FloatVectorValues copy() throws IOException {
-    return this;
+    return new JVectorFloatVectorValues(index, pq, similarityFunction, graphNodeIdToDocMap);
   }
 
   @Override
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 38670eb23157..da6917f66953 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -717,7 +717,18 @@ public RandomAccessMergedFloatVectorValues(
 
     @Override
     public RandomAccessMergedFloatVectorValues copy() {
-      throw new UnsupportedOperationException();
+      final FloatVectorValues[] newVectors = new FloatVectorValues[vectors.length];
+      for (int i = 0; i < newVectors.length; ++i) {
+        if (vectors[i] != null) {
+          try {
+            newVectors[i] = vectors[i].copy();
+          } catch (IOException e) {
+            throw new UncheckedIOException(e);
+          }
+        }
+      }
+      return new RandomAccessMergedFloatVectorValues(
+          size, dimension, newVectors, ordToReader, ordToReaderOrd);
     }
 
     @Override
@@ -738,28 +749,25 @@ public void getVectorInto(int node, VectorFloat<?> destinationVector, int offset
       final int ord = ordToReaderOrd.applyAsInt(node);
 
       if (values instanceof JVectorFloatVectorValues jVectorValues) {
-        synchronized (this) {
-          jVectorValues.getVectorInto(ord, destinationVector, offset);
-        }
+        jVectorValues.getVectorInto(ord, destinationVector, offset);
       }
 
-      synchronized (this) {
-        final float[] srcVector;
-        try {
-          srcVector = values.vectorValue(ord);
-        } catch (IOException e) {
-          throw new UncheckedIOException(e);
-        }
+      final float[] srcVector;
+      try {
+        srcVector = values.vectorValue(ord);
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
+      }
 
-        for (int i = 0; i < srcVector.length; ++i) {
-          destinationVector.set(i + offset, srcVector[i]);
-        }
+      for (int i = 0; i < srcVector.length; ++i) {
+        destinationVector.set(i + offset, srcVector[i]);
       }
     }
 
     @Override
     public boolean isValueShared() {
-      return false;
+      // force thread-local copies
+      return true;
     }
 
     @Override

From 58776c315992d2a9bc9122703f3bdc7e955d1e42 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 5 Nov 2025 20:50:25 +0000
Subject: [PATCH 55/86] Add BaseKnnVectorsFormatTestCase to TestJVectorFormat

---
 .../codecs/jvector/TestJVectorFormat.java     | 56 ++++++++++++++++++-
 1 file changed, 54 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
index 0c46a50a8b61..0f1413bbcce9 100644
--- a/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
+++ b/lucene/sandbox/src/test/org/apache/lucene/sandbox/codecs/jvector/TestJVectorFormat.java
@@ -17,6 +17,10 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
+import static org.apache.lucene.index.VectorEncoding.FLOAT32;
+import static org.apache.lucene.index.VectorSimilarityFunction.COSINE;
+import static org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
+import static org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
 import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION;
 
 import com.carrotsearch.randomizedtesting.ThreadFilter;
@@ -33,11 +37,13 @@
 import org.apache.lucene.search.*;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.tests.index.BaseKnnVectorsFormatTestCase;
 import org.apache.lucene.tests.index.RandomIndexWriter;
 import org.apache.lucene.tests.util.LuceneTestCase;
 import org.apache.lucene.tests.util.TestUtil;
 import org.apache.lucene.util.NamedThreadFactory;
 import org.junit.Assert;
+import org.junit.Ignore;
 import org.junit.Test;
 
 /** Test used specifically for JVector */
@@ -49,10 +55,42 @@
 @ThreadLeakFilters(
     defaultFilters = true,
     filters = {TestJVectorFormat.ThreadLeakFilter.class})
-public class TestJVectorFormat extends LuceneTestCase {
+public class TestJVectorFormat extends BaseKnnVectorsFormatTestCase {
+  private static final VectorEncoding[] SUPPORTED_ENCODINGS = {FLOAT32};
+  private static final VectorSimilarityFunction[] SUPPORTED_FUNCTIONS = {
+    DOT_PRODUCT, EUCLIDEAN, COSINE
+  };
   private static final String TEST_FIELD = "test_field";
   private static final String TEST_ID_FIELD = "id";
 
+  @Override
+  @Ignore("Does not honor visitedLimit")
+  public void testSearchWithVisitedLimit() {}
+
+  @Override
+  @Ignore("Does not support byte vectors")
+  public void testByteVectorScorerIteration() {}
+
+  @Override
+  @Ignore("Does not support byte vectors")
+  public void testMismatchedFields() {}
+
+  @Override
+  @Ignore("Does not support byte vectors")
+  public void testSortedIndexBytes() {}
+
+  @Override
+  @Ignore("Does not support byte vectors")
+  public void testRandomBytes() {}
+
+  @Override
+  @Ignore("Does not support byte vectors")
+  public void testEmptyByteVectorData() {}
+
+  @Override
+  @Ignore("Does not support byte vectors")
+  public void testMergingWithDifferentByteKnnFields() {}
+
   /**
    * Test to verify that the JVector codec is able to successfully search for the nearest neighbours
    * in the index. Single field is used to store the vectors. All the documents are stored in a
@@ -1553,7 +1591,21 @@ static float[][] generateRandomVectors(int count, int dimension) {
     return vectors;
   }
 
-  private Codec getCodec() {
+  @Override
+  protected VectorEncoding randomVectorEncoding() {
+    return SUPPORTED_ENCODINGS[random().nextInt(SUPPORTED_ENCODINGS.length)];
+  }
+
+  @Override
+  protected VectorSimilarityFunction randomSimilarity() {
+    return SUPPORTED_FUNCTIONS[random().nextInt(SUPPORTED_FUNCTIONS.length)];
+  }
+
+  @Override
+  protected void assertOffHeapByteSize(LeafReader r, String fieldName) throws IOException {}
+
+  @Override
+  protected Codec getCodec() {
     return getCodec(JVectorFormat.DEFAULT_MINIMUM_BATCH_SIZE_FOR_QUANTIZATION);
   }
 

From fa2908e2ae3374a53dc777d6eb1cb9f80aa29f70 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 19 Nov 2025 17:17:00 +0000
Subject: [PATCH 56/86] Remove explicit SIMD pools

---
 .../sandbox/codecs/jvector/JVectorFormat.java | 25 -----------
 .../sandbox/codecs/jvector/JVectorWriter.java | 44 +++++++++----------
 2 files changed, 20 insertions(+), 49 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index af727760c375..f917d835f5f0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -18,8 +18,6 @@
 package org.apache.lucene.sandbox.codecs.jvector;
 
 import java.io.IOException;
-import java.util.concurrent.ForkJoinPool;
-import java.util.concurrent.ForkJoinWorkerThread;
 import java.util.function.IntUnaryOperator;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -44,10 +42,6 @@ public class JVectorFormat extends KnnVectorsFormat {
   public static final float DEFAULT_NEIGHBOR_OVERFLOW = 2f;
   public static final float DEFAULT_ALPHA = 2f;
   public static final boolean DEFAULT_HIERARCHY_ENABLED = true;
-  // Unfortunately, this can't be managed yet by the OpenSearch ThreadPool because it's not
-  // supporting {@link ForkJoinPool} types
-  public static final ForkJoinPool SIMD_POOL_MERGE = getPhysicalCoreExecutor();
-  public static final ForkJoinPool SIMD_POOL_FLUSH = getPhysicalCoreExecutor();
 
   private final int maxConn;
   private final int beamWidth;
@@ -187,23 +181,4 @@ public static int getDefaultNumberOfSubspacesPerVector(int originalDimension) {
     }
     return compressedBytes;
   }
-
-  public static ForkJoinPool getPhysicalCoreExecutor() {
-    final int estimatedPhysicalCoreCount =
-        Integer.getInteger(
-            "jvector.physical_core_count",
-            Math.max(1, Runtime.getRuntime().availableProcessors() / 2));
-    assert estimatedPhysicalCoreCount > 0
-            && estimatedPhysicalCoreCount <= Runtime.getRuntime().availableProcessors()
-        : "Invalid core count: " + estimatedPhysicalCoreCount;
-    final ForkJoinPool.ForkJoinWorkerThreadFactory factory =
-        pool -> {
-          ForkJoinWorkerThread thread =
-              ForkJoinPool.defaultForkJoinWorkerThreadFactory.newThread(pool);
-          thread.setPriority(Thread.NORM_PRIORITY - 2);
-          return thread;
-        };
-
-    return new ForkJoinPool(estimatedPhysicalCoreCount, factory, null, true);
-  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index da6917f66953..6966a7d1c45a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -17,10 +17,7 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
-import static io.github.jbellis.jvector.quantization.KMeansPlusPlusClusterer.UNWEIGHTED;
 import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
-import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_FLUSH;
-import static org.apache.lucene.sandbox.codecs.jvector.JVectorFormat.SIMD_POOL_MERGE;
 
 import io.github.jbellis.jvector.graph.GraphIndexBuilder;
 import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
@@ -43,7 +40,8 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.List;
-import java.util.concurrent.ForkJoinPool;
+import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.Executor;
 import java.util.function.IntUnaryOperator;
 import java.util.stream.IntStream;
 import org.apache.lucene.codecs.CodecUtil;
@@ -242,7 +240,7 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
               randomAccessVectorValues,
               fieldInfo,
               segmentWriteState.segmentInfo.name,
-              SIMD_POOL_FLUSH);
+              Runnable::run);
       writeField(field.fieldInfo, randomAccessVectorValues, pqVectors, graphNodeIdToDocMap, graph);
     }
   }
@@ -356,17 +354,15 @@ private PQVectors getPQVectors(
     final var numberOfClustersPerSubspace =
         Math.min(256, randomAccessVectorValues.size()); // number of centroids per
     // subspace
+
     ProductQuantization pq =
         ProductQuantization.compute(
             randomAccessVectorValues,
-            M, // number of subspaces
-            numberOfClustersPerSubspace, // number of centroids per subspace
-            vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN, // center the dataset
-            UNWEIGHTED,
-            SIMD_POOL_MERGE,
-            ForkJoinPool.commonPool());
-
-    return pq.encodeAll(randomAccessVectorValues, SIMD_POOL_MERGE);
+            M,
+            numberOfClustersPerSubspace,
+            vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN);
+
+    return (PQVectors) pq.encodeAll(randomAccessVectorValues);
   }
 
   /// Metadata about the index to be persisted on disk
@@ -642,7 +638,8 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             new RandomAccessVectorValuesOverVectorValues(values);
         newPq = newPq.refine(randomAccessVectorValues);
       }
-      pqVectors = newPq.encodeAll(ravv, SIMD_POOL_MERGE);
+      newPq.encodeAll(ravv);
+      pqVectors = (PQVectors) newPq.encodeAll(ravv);
     } else if (ravv.size() >= minimumBatchSizeForQuantization) {
       // No pre-existing codebooks, check if we have enough vectors to trigger quantization
       pqVectors = getPQVectors(ravv, fieldInfo);
@@ -668,7 +665,7 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             ravv,
             fieldInfo,
             segmentWriteState.segmentInfo.name,
-            SIMD_POOL_MERGE);
+            mergeState.intraMergeTaskExecutor);
     writeField(fieldInfo, ravv, pqVectors, graphNodeIdToDocMap, graph);
   }
 
@@ -786,7 +783,7 @@ public OnHeapGraphIndex getGraph(
       RandomAccessVectorValues randomAccessVectorValues,
       FieldInfo fieldInfo,
       String segmentName,
-      ForkJoinPool SIMD_POOL) {
+      Executor executor) {
     final GraphIndexBuilder graphIndexBuilder =
         new GraphIndexBuilder(
             buildScoreProvider,
@@ -804,17 +801,16 @@ public OnHeapGraphIndex getGraph(
      * This is the case when we are merging segments and we might have more documents than vectors.
      */
     final OnHeapGraphIndex graphIndex;
-    var vv = randomAccessVectorValues.threadLocalSupplier();
+    final var vv = randomAccessVectorValues.threadLocalSupplier();
 
     // parallel graph construction from the merge documents Ids
     final int size = randomAccessVectorValues.size();
-    SIMD_POOL
-        .submit(
-            () ->
-                IntStream.range(0, size)
-                    .parallel()
-                    .forEach(ord -> graphIndexBuilder.addGraphNode(ord, vv.get().getVector(ord))))
-        .join();
+    IntStream.range(0, size)
+        .mapToObj(
+            ord ->
+                CompletableFuture.runAsync(
+                    () -> graphIndexBuilder.addGraphNode(ord, vv.get().getVector(ord)), executor))
+        .forEach(CompletableFuture::join);
     graphIndexBuilder.cleanup();
     graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();
 

From 3ad86d8514c8dbf6f11ab9a64b6e8c4e8ce51f2c Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Wed, 19 Nov 2025 18:56:03 +0000
Subject: [PATCH 57/86] Replace VectorSimilarityMapper with simple switch

---
 .../sandbox/codecs/jvector/JVectorFormat.java | 20 +++++++
 .../sandbox/codecs/jvector/JVectorReader.java | 57 +------------------
 .../sandbox/codecs/jvector/JVectorWriter.java | 45 ++++++---------
 3 files changed, 38 insertions(+), 84 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index f917d835f5f0..25b0b3da6d5c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -181,4 +181,24 @@ public static int getDefaultNumberOfSubspacesPerVector(int originalDimension) {
     }
     return compressedBytes;
   }
+
+  static io.github.jbellis.jvector.vector.VectorSimilarityFunction toJVectorSimilarity(
+      final org.apache.lucene.index.VectorSimilarityFunction luceneFunction) {
+    return switch (luceneFunction) {
+      case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
+      case DOT_PRODUCT -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.DOT_PRODUCT;
+      case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
+      case MAXIMUM_INNER_PRODUCT ->
+          throw new UnsupportedOperationException("JVector does not support MAXIMUM_INNER_PRODUCT");
+    };
+  }
+
+  static org.apache.lucene.index.VectorSimilarityFunction toLuceneSimilarity(
+      final io.github.jbellis.jvector.vector.VectorSimilarityFunction jVectorFunction) {
+    return switch (jVectorFunction) {
+      case COSINE -> org.apache.lucene.index.VectorSimilarityFunction.COSINE;
+      case DOT_PRODUCT -> org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT;
+      case EUCLIDEAN -> org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN;
+    };
+  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index ed2564130dad..3dc056096ac0 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -33,7 +33,6 @@
 import java.io.Closeable;
 import java.io.IOException;
 import java.util.HashMap;
-import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import org.apache.lucene.codecs.CodecUtil;
@@ -264,9 +263,7 @@ class FieldEntry implements Closeable {
     public FieldEntry(
         FieldInfo fieldInfo, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata)
         throws IOException {
-      this.similarityFunction =
-          VectorSimilarityMapper.ordToDistFunc(
-              vectorIndexFieldMetadata.vectorSimilarityFunction.ordinal());
+      this.similarityFunction = vectorIndexFieldMetadata.vectorSimilarityFunction;
       this.vectorDimension = vectorIndexFieldMetadata.vectorDimension;
       this.vectorIndexOffset = vectorIndexFieldMetadata.vectorIndexOffset;
       this.vectorIndexLength = vectorIndexFieldMetadata.vectorIndexLength;
@@ -325,56 +322,4 @@ public void close() throws IOException {
       }
     }
   }
-
-  /** Utility class to map between Lucene and jVector similarity functions and metadata ordinals. */
-  public static class VectorSimilarityMapper {
-    /**
-     * List of vector similarity functions supported by <a
-     * href="https://github.com/jbellis/jvector">jVector library</a> The similarity functions orders
-     * matter in this list because it is later used to resolve the similarity function by ordinal.
-     */
-    public static final List<VectorSimilarityFunction> JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS =
-        List.of(
-            VectorSimilarityFunction.EUCLIDEAN,
-            VectorSimilarityFunction.DOT_PRODUCT,
-            VectorSimilarityFunction.COSINE);
-
-    public static final Map<
-            org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction>
-        LUCENE_TO_JVECTOR_MAP =
-            Map.of(
-                org.apache.lucene.index.VectorSimilarityFunction.EUCLIDEAN,
-                VectorSimilarityFunction.EUCLIDEAN,
-                org.apache.lucene.index.VectorSimilarityFunction.DOT_PRODUCT,
-                VectorSimilarityFunction.DOT_PRODUCT,
-                org.apache.lucene.index.VectorSimilarityFunction.COSINE,
-                VectorSimilarityFunction.COSINE);
-
-    public static int distFuncToOrd(org.apache.lucene.index.VectorSimilarityFunction func) {
-      if (LUCENE_TO_JVECTOR_MAP.containsKey(func)) {
-        return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.indexOf(LUCENE_TO_JVECTOR_MAP.get(func));
-      }
-
-      throw new IllegalArgumentException("invalid distance function: " + func);
-    }
-
-    public static VectorSimilarityFunction ordToDistFunc(int ord) {
-      return JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
-    }
-
-    public static org.apache.lucene.index.VectorSimilarityFunction ordToLuceneDistFunc(int ord) {
-      if (ord < 0 || ord >= JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.size()) {
-        throw new IllegalArgumentException("Invalid ordinal: " + ord);
-      }
-      VectorSimilarityFunction jvectorFunc = JVECTOR_SUPPORTED_SIMILARITY_FUNCTIONS.get(ord);
-      for (Map.Entry<org.apache.lucene.index.VectorSimilarityFunction, VectorSimilarityFunction>
-          entry : LUCENE_TO_JVECTOR_MAP.entrySet()) {
-        if (entry.getValue().equals(jvectorFunc)) {
-          return entry.getKey();
-        }
-      }
-      throw new IllegalStateException(
-          "No matching Lucene VectorSimilarityFunction found for ordinal: " + ord);
-    }
-  }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 6966a7d1c45a..e4d2fe441b62 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -30,6 +30,7 @@
 import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
 import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.quantization.ProductQuantization;
+import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
 import io.github.jbellis.jvector.vector.VectorizationProvider;
 import io.github.jbellis.jvector.vector.types.VectorFloat;
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
@@ -57,7 +58,6 @@
 import org.apache.lucene.index.SegmentWriteState;
 import org.apache.lucene.index.Sorter;
 import org.apache.lucene.index.VectorEncoding;
-import org.apache.lucene.index.VectorSimilarityFunction;
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.IndexOutput;
 import org.apache.lucene.util.IOUtils;
@@ -224,13 +224,15 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
         pqVectors = getPQVectors(randomAccessVectorValues, fieldInfo);
         buildScoreProvider =
             BuildScoreProvider.pqBuildScoreProvider(
-                getVectorSimilarityFunction(fieldInfo), pqVectors);
+                JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()),
+                pqVectors);
       } else {
         // Not enough vectors for quantization; use full precision vectors instead
         pqVectors = null;
         buildScoreProvider =
             BuildScoreProvider.randomAccessScoreProvider(
-                randomAccessVectorValues, getVectorSimilarityFunction(fieldInfo));
+                randomAccessVectorValues,
+                JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()));
       }
 
       final GraphNodeIdToDocMap graphNodeIdToDocMap = field.createGraphNodeIdToDocMap();
@@ -295,7 +297,7 @@ private VectorIndexFieldMetadata writeGraph(
         return new VectorIndexFieldMetadata(
             fieldInfo.number,
             fieldInfo.getVectorEncoding(),
-            fieldInfo.getVectorSimilarityFunction(),
+            JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()),
             randomAccessVectorValues.dimension(),
             0,
             0,
@@ -333,7 +335,7 @@ private VectorIndexFieldMetadata writeGraph(
         return new VectorIndexFieldMetadata(
             fieldInfo.number,
             fieldInfo.getVectorEncoding(),
-            fieldInfo.getVectorSimilarityFunction(),
+            JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()),
             randomAccessVectorValues.dimension(),
             startOffset,
             endGraphOffset - startOffset,
@@ -347,8 +349,11 @@ private VectorIndexFieldMetadata writeGraph(
 
   private PQVectors getPQVectors(
       RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo) throws IOException {
-    final VectorSimilarityFunction vectorSimilarityFunction =
-        fieldInfo.getVectorSimilarityFunction();
+    final boolean globallyCenter =
+        switch (fieldInfo.getVectorSimilarityFunction()) {
+          case EUCLIDEAN -> true;
+          case COSINE, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> false;
+        };
     final int M =
         numberOfSubspacesPerVectorSupplier.applyAsInt(randomAccessVectorValues.dimension());
     final var numberOfClustersPerSubspace =
@@ -357,10 +362,7 @@ private PQVectors getPQVectors(
 
     ProductQuantization pq =
         ProductQuantization.compute(
-            randomAccessVectorValues,
-            M,
-            numberOfClustersPerSubspace,
-            vectorSimilarityFunction == VectorSimilarityFunction.EUCLIDEAN);
+            randomAccessVectorValues, M, numberOfClustersPerSubspace, globallyCenter);
 
     return (PQVectors) pq.encodeAll(randomAccessVectorValues);
   }
@@ -404,7 +406,7 @@ public VectorIndexFieldMetadata(
     public void toOutput(IndexOutput out) throws IOException {
       out.writeInt(fieldNumber);
       out.writeInt(vectorEncoding.ordinal());
-      out.writeInt(JVectorReader.VectorSimilarityMapper.distFuncToOrd(vectorSimilarityFunction));
+      out.writeInt(vectorSimilarityFunction.ordinal());
       out.writeVInt(vectorDimension);
       out.writeVLong(vectorIndexOffset);
       out.writeVLong(vectorIndexLength);
@@ -417,8 +419,7 @@ public void toOutput(IndexOutput out) throws IOException {
     public VectorIndexFieldMetadata(IndexInput in) throws IOException {
       this.fieldNumber = in.readInt();
       this.vectorEncoding = readVectorEncoding(in);
-      this.vectorSimilarityFunction =
-          JVectorReader.VectorSimilarityMapper.ordToLuceneDistFunc(in.readInt());
+      this.vectorSimilarityFunction = VectorSimilarityFunction.values()[in.readInt()];
       this.vectorDimension = in.readVInt();
       this.vectorIndexOffset = in.readVLong();
       this.vectorIndexLength = in.readVLong();
@@ -540,19 +541,6 @@ public long ramBytesUsed() {
     }
   }
 
-  static io.github.jbellis.jvector.vector.VectorSimilarityFunction getVectorSimilarityFunction(
-      FieldInfo fieldInfo) {
-    return switch (fieldInfo.getVectorSimilarityFunction()) {
-      case EUCLIDEAN -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.EUCLIDEAN;
-      case COSINE -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.COSINE;
-      case DOT_PRODUCT -> io.github.jbellis.jvector.vector.VectorSimilarityFunction.DOT_PRODUCT;
-      // $CASES-OMITTED$
-      default ->
-          throw new IllegalArgumentException(
-              "Unsupported similarity function: " + fieldInfo.getVectorSimilarityFunction());
-    };
-  }
-
   private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
     assert fieldInfo.hasVectorValues();
     final int dimension = fieldInfo.getVectorDimension();
@@ -648,7 +636,8 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
     }
 
     final BuildScoreProvider buildScoreProvider;
-    final var similarityFunction = getVectorSimilarityFunction(fieldInfo);
+    final var similarityFunction =
+        JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction());
     if (pqVectors != null) {
       // Re-use PQ codebooks to build a new graph from scratch
       buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors);

From b5255b371e3459ce608ddf7752b0300724dc25da Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 20 Nov 2025 17:45:13 +0000
Subject: [PATCH 58/86] fixup! Remove explicit SIMD pools

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java   | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index e4d2fe441b62..3461e9c4aee3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -626,7 +626,6 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             new RandomAccessVectorValuesOverVectorValues(values);
         newPq = newPq.refine(randomAccessVectorValues);
       }
-      newPq.encodeAll(ravv);
       pqVectors = (PQVectors) newPq.encodeAll(ravv);
     } else if (ravv.size() >= minimumBatchSizeForQuantization) {
       // No pre-existing codebooks, check if we have enough vectors to trigger quantization
@@ -799,7 +798,8 @@ public OnHeapGraphIndex getGraph(
             ord ->
                 CompletableFuture.runAsync(
                     () -> graphIndexBuilder.addGraphNode(ord, vv.get().getVector(ord)), executor))
-        .forEach(CompletableFuture::join);
+        .reduce((a, b) -> a.runAfterBoth(b, () -> {}))
+        .ifPresent(CompletableFuture::join);
     graphIndexBuilder.cleanup();
     graphIndex = (OnHeapGraphIndex) graphIndexBuilder.getGraph();
 

From 5e2a8a0fbbedc6254f47ef8030ddd1218657165d Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 20 Nov 2025 20:45:24 +0000
Subject: [PATCH 59/86] Make RandomAccessVectorValuesOverVectorValues
 thread-safe

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 22 +++++++++----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 3461e9c4aee3..7274c660755a 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -807,8 +807,6 @@ public OnHeapGraphIndex getGraph(
   }
 
   static class RandomAccessVectorValuesOverVectorValues implements RandomAccessVectorValues {
-    private final VectorTypeSupport VECTOR_TYPE_SUPPORT =
-        VectorizationProvider.getInstance().getVectorTypeSupport();
     private final FloatVectorValues values;
 
     public RandomAccessVectorValuesOverVectorValues(FloatVectorValues values) {
@@ -828,26 +826,26 @@ public int dimension() {
     @Override
     public VectorFloat<?> getVector(int nodeId) {
       try {
-        // Access to float values is not thread safe
-        synchronized (this) {
-          final float[] vector = values.vectorValue(nodeId);
-          final float[] copy = new float[vector.length];
-          System.arraycopy(vector, 0, copy, 0, vector.length);
-          return VECTOR_TYPE_SUPPORT.createFloatVector(copy);
-        }
+        final float[] vector = values.vectorValue(nodeId);
+        return VECTOR_TYPE_SUPPORT.createFloatVector(Arrays.copyOf(vector, vector.length));
       } catch (IOException e) {
-        throw new RuntimeException(e);
+        throw new UncheckedIOException(e);
       }
     }
 
     @Override
     public boolean isValueShared() {
-      return false;
+      // Access to float values is not thread safe
+      return true;
     }
 
     @Override
     public RandomAccessVectorValues copy() {
-      throw new UnsupportedOperationException("Copy not supported");
+      try {
+        return new RandomAccessVectorValuesOverVectorValues(values.copy());
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
+      }
     }
   }
 }

From d3aeeb5e1d05a664120750aa8756f36af38d7604 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Fri, 21 Nov 2025 20:21:36 +0000
Subject: [PATCH 60/86] Use OrdinalMapper for sorting index

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 97 +++++++++++--------
 1 file changed, 56 insertions(+), 41 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 7274c660755a..9fba3e6fc490 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -24,6 +24,7 @@
 import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
 import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
 import io.github.jbellis.jvector.graph.disk.OnDiskSequentialGraphIndexWriter;
+import io.github.jbellis.jvector.graph.disk.OrdinalMapper;
 import io.github.jbellis.jvector.graph.disk.feature.Feature;
 import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
 import io.github.jbellis.jvector.graph.disk.feature.InlineVectors;
@@ -39,7 +40,6 @@
 import java.io.UnsupportedEncodingException;
 import java.util.ArrayList;
 import java.util.Arrays;
-import java.util.Collections;
 import java.util.List;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executor;
@@ -92,8 +92,7 @@
  * jVector ordinals and the new Lucene document IDs. This is achieved by keeping checkpoints of the
  * {@link GraphNodeIdToDocMap} class in the index metadata and allowing us to update the mapping as
  * needed across merges by constructing a new mapping from the previous mapping and the {@link
- * org.apache.lucene.index.MergeState.DocMap} provided in the {@link MergeState}. And across sorts
- * with {@link FieldWriter#applySort(Sorter.DocMap)} during flushes.
+ * org.apache.lucene.index.MergeState.DocMap} provided in the {@link MergeState}.
  */
 public class JVectorWriter extends KnnVectorsWriter {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
@@ -213,8 +212,19 @@ public void mergeOneField(FieldInfo fieldInfo, MergeState mergeState) throws IOE
   @Override
   public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
     for (FieldWriter field : fields) {
+      final DocsWithFieldSet newDocIds;
+      final OrdinalMapper ordinalMapper;
       if (sortMap != null) {
-        field.applySort(sortMap);
+        assert field.docIds.cardinality() <= sortMap.size();
+        final int size = field.docIds.cardinality();
+        final int[] oldToNew = new int[size];
+        final int[] newToOld = new int[size];
+        newDocIds = new DocsWithFieldSet();
+        KnnVectorsWriter.mapOldOrdToNewOrd(field.docIds, sortMap, oldToNew, newToOld, newDocIds);
+        ordinalMapper = new ArrayOrdinalMapper(size - 1, oldToNew, newToOld);
+      } else {
+        newDocIds = field.docIds;
+        ordinalMapper = null;
       }
       final RandomAccessVectorValues randomAccessVectorValues = field.toRandomAccessVectorValues();
       final BuildScoreProvider buildScoreProvider;
@@ -235,7 +245,7 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
                 JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()));
       }
 
-      final GraphNodeIdToDocMap graphNodeIdToDocMap = field.createGraphNodeIdToDocMap();
+      final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(newDocIds);
       OnHeapGraphIndex graph =
           getGraph(
               buildScoreProvider,
@@ -243,7 +253,31 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
               fieldInfo,
               segmentWriteState.segmentInfo.name,
               Runnable::run);
-      writeField(field.fieldInfo, randomAccessVectorValues, pqVectors, graphNodeIdToDocMap, graph);
+      writeField(
+          field.fieldInfo,
+          randomAccessVectorValues,
+          pqVectors,
+          ordinalMapper,
+          graphNodeIdToDocMap,
+          graph);
+    }
+  }
+
+  private record ArrayOrdinalMapper(int maxOrdinal, int[] oldToNew, int[] newToOld)
+      implements OrdinalMapper {
+    @Override
+    public int maxOrdinal() {
+      return maxOrdinal;
+    }
+
+    @Override
+    public int oldToNew(int oldOrdinal) {
+      return oldToNew[oldOrdinal];
+    }
+
+    @Override
+    public int newToOld(int newOrdinal) {
+      return newToOld[newOrdinal];
     }
   }
 
@@ -251,11 +285,18 @@ private void writeField(
       FieldInfo fieldInfo,
       RandomAccessVectorValues randomAccessVectorValues,
       PQVectors pqVectors,
+      OrdinalMapper ordinalMapper,
       GraphNodeIdToDocMap graphNodeIdToDocMap,
       OnHeapGraphIndex graph)
       throws IOException {
     final var vectorIndexFieldMetadata =
-        writeGraph(graph, randomAccessVectorValues, fieldInfo, pqVectors, graphNodeIdToDocMap);
+        writeGraph(
+            graph,
+            randomAccessVectorValues,
+            fieldInfo,
+            pqVectors,
+            ordinalMapper,
+            graphNodeIdToDocMap);
     meta.writeInt(fieldInfo.number);
     vectorIndexFieldMetadata.toOutput(meta);
   }
@@ -274,6 +315,7 @@ private VectorIndexFieldMetadata writeGraph(
       RandomAccessVectorValues randomAccessVectorValues,
       FieldInfo fieldInfo,
       PQVectors pqVectors,
+      OrdinalMapper ordinalMapper,
       GraphNodeIdToDocMap graphNodeIdToDocMap)
       throws IOException {
     // field data file, which contains the graph
@@ -306,10 +348,13 @@ private VectorIndexFieldMetadata writeGraph(
             degreeOverflow,
             graphNodeIdToDocMap);
       }
-      try (var writer =
+      final var writerBuilder =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
-              .with(new InlineVectors(randomAccessVectorValues.dimension()))
-              .build()) {
+              .with(new InlineVectors(randomAccessVectorValues.dimension()));
+      if (ordinalMapper != null) {
+        writerBuilder.withMapper(ordinalMapper);
+      }
+      try (var writer = writerBuilder.build()) {
         var suppliers =
             Feature.singleStateFactory(
                 FeatureId.INLINE_VECTORS,
@@ -499,40 +544,10 @@ public float[] copyValue(float[] vectorValue) {
       return vectorValue.clone();
     }
 
-    public void applySort(Sorter.DocMap sortMap) throws IOException {
-      // Ensure that all existing docs can be sorted
-      final int[] oldToNewOrd = new int[vectors.size()];
-      final DocsWithFieldSet oldDocIds = docIds;
-      docIds = new DocsWithFieldSet();
-      mapOldOrdToNewOrd(oldDocIds, sortMap, oldToNewOrd, null, docIds);
-
-      // Swap vectors into their new ordinals
-      for (int oldOrd = 0; oldOrd < vectors.size(); ++oldOrd) {
-        final int newOrd = oldToNewOrd[oldOrd];
-        if (oldOrd == newOrd) {
-          continue;
-        }
-
-        // Swap the element at oldOrd into its position at newOrd and update the index mapping
-        Collections.swap(vectors, oldOrd, newOrd);
-        oldToNewOrd[oldOrd] = oldToNewOrd[newOrd];
-        oldToNewOrd[newOrd] = newOrd;
-
-        // The element at oldOrd may be displaced and need to be swapped again
-        if (oldToNewOrd[oldOrd] != oldOrd) {
-          oldOrd -= 1;
-        }
-      }
-    }
-
     public RandomAccessVectorValues toRandomAccessVectorValues() {
       return new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
     }
 
-    public GraphNodeIdToDocMap createGraphNodeIdToDocMap() {
-      return new GraphNodeIdToDocMap(docIds);
-    }
-
     @Override
     public long ramBytesUsed() {
       return SHALLOW_SIZE
@@ -654,7 +669,7 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             fieldInfo,
             segmentWriteState.segmentInfo.name,
             mergeState.intraMergeTaskExecutor);
-    writeField(fieldInfo, ravv, pqVectors, graphNodeIdToDocMap, graph);
+    writeField(fieldInfo, ravv, pqVectors, null, graphNodeIdToDocMap, graph);
   }
 
   private static final class SubFloatVectors extends DocIDMerger.Sub {

From d5f8bb259526ba72edb17fa90096dddf53e7e9c8 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 05:46:00 +0000
Subject: [PATCH 61/86] Don't write metadata for empty graphs

---
 .../sandbox/codecs/jvector/JVectorReader.java | 28 ++++++++-----------
 .../sandbox/codecs/jvector/JVectorWriter.java | 22 +++++----------
 2 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 3dc056096ac0..b006a9bb7c0c 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -274,22 +274,18 @@ public FieldEntry(
       this.vectorIndexFieldDataFileName =
           baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
 
-      if (vectorIndexLength != 0) {
-        // For the slice we would like to include the Lucene header, unfortunately, we have to do
-        // this because jVector use global offsets instead of local offsets
-        final long sliceLength =
-            vectorIndexLength
-                + CodecUtil.indexHeaderLength(
-                    JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
-        // Load the graph index
-        this.indexReaderSupplier =
-            new JVectorRandomAccessReader.Supplier(
-                directory.openInput(vectorIndexFieldDataFileName, state.context), 0, sliceLength);
-        this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
-      } else {
-        this.indexReaderSupplier = null;
-        this.index = null;
-      }
+      assert vectorIndexLength > 0 : "Read empty JVector graph";
+      // For the slice we would like to include the Lucene header, unfortunately, we have to do
+      // this because jVector use global offsets instead of local offsets
+      final long sliceLength =
+          vectorIndexLength
+              + CodecUtil.indexHeaderLength(
+                  JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
+      // Load the graph index
+      this.indexReaderSupplier =
+          new JVectorRandomAccessReader.Supplier(
+              directory.openInput(vectorIndexFieldDataFileName, state.context), 0, sliceLength);
+      this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
 
       // If quantized load the compressed product quantized vectors with their codebooks
       if (pqCodebooksAndVectorsLength > 0) {
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 9fba3e6fc490..8ec2332437d8 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -334,20 +334,6 @@ private VectorIndexFieldMetadata writeGraph(
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
       final long startOffset = indexOutput.getFilePointer();
-      if (graph.size() == 0) {
-        CodecUtil.writeFooter(indexOutput);
-        return new VectorIndexFieldMetadata(
-            fieldInfo.number,
-            fieldInfo.getVectorEncoding(),
-            JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()),
-            randomAccessVectorValues.dimension(),
-            0,
-            0,
-            0,
-            0,
-            degreeOverflow,
-            graphNodeIdToDocMap);
-      }
       final var writerBuilder =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
               .with(new InlineVectors(randomAccessVectorValues.dimension()));
@@ -601,8 +587,13 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
       ord += 1;
     }
 
-    // Make a RandomAccessVectorValues instance using the new graph ordinals
     final int totalLiveDocsCount = ord;
+    if (totalLiveDocsCount == 0) {
+      // Avoid writing an empty graph
+      return;
+    }
+
+    // Make a RandomAccessVectorValues instance using the new graph ordinals
     final var ravv =
         new RandomAccessMergedFloatVectorValues(
             totalLiveDocsCount,
@@ -787,6 +778,7 @@ public OnHeapGraphIndex getGraph(
       FieldInfo fieldInfo,
       String segmentName,
       Executor executor) {
+    assert randomAccessVectorValues.size() > 0 : "Cannot build empty graph";
     final GraphIndexBuilder graphIndexBuilder =
         new GraphIndexBuilder(
             buildScoreProvider,

From d7b40bad6a048fb6f4a0b6c7180cbecb20ff2c16 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 06:03:33 +0000
Subject: [PATCH 62/86] Fix JVector data slicing

---
 .../jvector/JVectorRandomAccessReader.java    | 45 +++----------------
 .../sandbox/codecs/jvector/JVectorReader.java | 10 ++---
 2 files changed, 11 insertions(+), 44 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index de87f451f5c8..8d9445314975 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -137,53 +137,20 @@ public long length() throws IOException {
    * io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex#load(ReaderSupplier, long)}
    */
   public static class Supplier implements ReaderSupplier {
-    private final AtomicInteger readerCount = new AtomicInteger(0);
-    private final IndexInput currentInput;
-    private final long sliceStartOffset;
-    private final long sliceLength;
-    private final ConcurrentHashMap<Integer, RandomAccessReader> readers =
-        new ConcurrentHashMap<>();
-
-    public Supplier(IndexInput indexInput) throws IOException {
-      this(
-          indexInput,
-          indexInput.getFilePointer(),
-          indexInput.length() - indexInput.getFilePointer());
-    }
+    private final IndexInput input;
 
-    public Supplier(IndexInput indexInput, long sliceStartOffset, long sliceLength)
-        throws IOException {
-      this.currentInput = indexInput;
-      this.sliceStartOffset = sliceStartOffset;
-      this.sliceLength = sliceLength;
+    public Supplier(IndexInput input) {
+      this.input = input;
     }
 
     @Override
-    public RandomAccessReader get() throws IOException {
-      synchronized (this) {
-        final IndexInput input =
-            currentInput
-                .slice("Input Slice for the jVector graph or PQ", sliceStartOffset, sliceLength)
-                .clone();
-
-        var reader = new JVectorRandomAccessReader(input);
-        int readerId = readerCount.getAndIncrement();
-        readers.put(readerId, reader);
-        return reader;
-      }
+    public synchronized RandomAccessReader get() throws IOException {
+      return new JVectorRandomAccessReader(input.clone());
     }
 
     @Override
     public void close() throws IOException {
-      // Close source of all cloned inputs
-      IOUtils.closeWhileHandlingException(currentInput);
-
-      // Close all readers
-      for (RandomAccessReader reader : readers.values()) {
-        IOUtils.closeWhileHandlingException(reader::close);
-      }
-      readers.clear();
-      readerCount.set(0);
+      // Cloned inputs do not need to be closed
     }
   }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index b006a9bb7c0c..ff6394b6fbe5 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -255,6 +255,7 @@ class FieldEntry implements Closeable {
     private final long pqCodebooksAndVectorsOffset;
     private final String vectorIndexFieldDataFileName;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
+    private final IndexInput data;
     private final ReaderSupplier indexReaderSupplier;
     private final ReaderSupplier pqCodebooksReaderSupplier;
     private final OnDiskGraphIndex index;
@@ -275,6 +276,7 @@ public FieldEntry(
           baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
 
       assert vectorIndexLength > 0 : "Read empty JVector graph";
+      this.data = directory.openInput(vectorIndexFieldDataFileName, state.context);
       // For the slice we would like to include the Lucene header, unfortunately, we have to do
       // this because jVector use global offsets instead of local offsets
       final long sliceLength =
@@ -283,8 +285,7 @@ public FieldEntry(
                   JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
       // Load the graph index
       this.indexReaderSupplier =
-          new JVectorRandomAccessReader.Supplier(
-              directory.openInput(vectorIndexFieldDataFileName, state.context), 0, sliceLength);
+          new JVectorRandomAccessReader.Supplier(data.slice("graph", 0, sliceLength));
       this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
 
       // If quantized load the compressed product quantized vectors with their codebooks
@@ -296,9 +297,7 @@ public FieldEntry(
         }
         this.pqCodebooksReaderSupplier =
             new JVectorRandomAccessReader.Supplier(
-                directory.openInput(vectorIndexFieldDataFileName, IOContext.READONCE),
-                pqCodebooksAndVectorsOffset,
-                pqCodebooksAndVectorsLength);
+              data.slice("pq", pqCodebooksAndVectorsOffset, pqCodebooksAndVectorsLength));
         try (final var randomAccessReader = pqCodebooksReaderSupplier.get()) {
           this.pqVectors = PQVectors.load(randomAccessReader);
         }
@@ -310,6 +309,7 @@ public FieldEntry(
 
     @Override
     public void close() throws IOException {
+      IOUtils.close(data);
       if (indexReaderSupplier != null) {
         IOUtils.close(indexReaderSupplier::close);
       }

From 7e21ddfd4fbeb745943ab72b46e0547706e6ff0b Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 06:07:36 +0000
Subject: [PATCH 63/86] Fix JVectorRandomAccessReader imports

---
 .../sandbox/codecs/jvector/JVectorRandomAccessReader.java      | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 8d9445314975..d7622ebc85a8 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -23,10 +23,7 @@
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.nio.FloatBuffer;
-import java.util.concurrent.ConcurrentHashMap;
-import java.util.concurrent.atomic.AtomicInteger;
 import org.apache.lucene.store.IndexInput;
-import org.apache.lucene.util.IOUtils;
 
 /// Implements JVector reader capabilities over a Lucene IndexInput
 public class JVectorRandomAccessReader implements RandomAccessReader {

From 3ff7e4c15974d43ffd440fc0312c7db53fec0563 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 06:08:57 +0000
Subject: [PATCH 64/86] Remove pqCodebooksReaderSupplier

---
 .../lucene/sandbox/codecs/jvector/JVectorReader.java | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index ff6394b6fbe5..80b00fce1b6b 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -257,7 +257,6 @@ class FieldEntry implements Closeable {
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final IndexInput data;
     private final ReaderSupplier indexReaderSupplier;
-    private final ReaderSupplier pqCodebooksReaderSupplier;
     private final OnDiskGraphIndex index;
     private final PQVectors pqVectors; // The product quantized vectors with their codebooks
 
@@ -295,14 +294,12 @@ public FieldEntry(
           throw new IllegalArgumentException(
               "pqCodebooksAndVectorsOffset must be greater than vectorIndexOffset");
         }
-        this.pqCodebooksReaderSupplier =
-            new JVectorRandomAccessReader.Supplier(
-              data.slice("pq", pqCodebooksAndVectorsOffset, pqCodebooksAndVectorsLength));
-        try (final var randomAccessReader = pqCodebooksReaderSupplier.get()) {
+        final var pqSlice =
+            data.slice("pq", pqCodebooksAndVectorsOffset, pqCodebooksAndVectorsLength);
+        try (final var randomAccessReader = new JVectorRandomAccessReader(pqSlice)) {
           this.pqVectors = PQVectors.load(randomAccessReader);
         }
       } else {
-        this.pqCodebooksReaderSupplier = null;
         this.pqVectors = null;
       }
     }
@@ -313,9 +310,6 @@ public void close() throws IOException {
       if (indexReaderSupplier != null) {
         IOUtils.close(indexReaderSupplier::close);
       }
-      if (pqCodebooksReaderSupplier != null) {
-        IOUtils.close(pqCodebooksReaderSupplier::close);
-      }
     }
   }
 }

From 44847ec3f1287728f45f7e9c754f09615b7a1b55 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 06:12:14 +0000
Subject: [PATCH 65/86] Remove indexReaderSupplier

---
 .../lucene/sandbox/codecs/jvector/JVectorReader.java     | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 80b00fce1b6b..3a3d1230b93f 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -17,7 +17,6 @@
 
 package org.apache.lucene.sandbox.codecs.jvector;
 
-import io.github.jbellis.jvector.disk.ReaderSupplier;
 import io.github.jbellis.jvector.graph.GraphSearcher;
 import io.github.jbellis.jvector.graph.SearchResult;
 import io.github.jbellis.jvector.graph.disk.OnDiskGraphIndex;
@@ -256,7 +255,6 @@ class FieldEntry implements Closeable {
     private final String vectorIndexFieldDataFileName;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final IndexInput data;
-    private final ReaderSupplier indexReaderSupplier;
     private final OnDiskGraphIndex index;
     private final PQVectors pqVectors; // The product quantized vectors with their codebooks
 
@@ -282,8 +280,8 @@ public FieldEntry(
           vectorIndexLength
               + CodecUtil.indexHeaderLength(
                   JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
-      // Load the graph index
-      this.indexReaderSupplier =
+      // Load the graph index from cloned slices of data (no need to close)
+      final var indexReaderSupplier =
           new JVectorRandomAccessReader.Supplier(data.slice("graph", 0, sliceLength));
       this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
 
@@ -307,9 +305,6 @@ public FieldEntry(
     @Override
     public void close() throws IOException {
       IOUtils.close(data);
-      if (indexReaderSupplier != null) {
-        IOUtils.close(indexReaderSupplier::close);
-      }
     }
   }
 }

From 8aa2cb175d0b5bd7e51db9c02e843276e9579f71 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 06:19:12 +0000
Subject: [PATCH 66/86] Remove useless offset/length fields

---
 .../sandbox/codecs/jvector/JVectorReader.java | 30 ++++++++-----------
 1 file changed, 12 insertions(+), 18 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 3a3d1230b93f..c158f724fc9e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -248,10 +248,6 @@ private void readFields(ChecksumIndexInput meta) throws IOException {
   class FieldEntry implements Closeable {
     private final VectorSimilarityFunction similarityFunction;
     private final int vectorDimension;
-    private final long vectorIndexOffset;
-    private final long vectorIndexLength;
-    private final long pqCodebooksAndVectorsLength;
-    private final long pqCodebooksAndVectorsOffset;
     private final String vectorIndexFieldDataFileName;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
     private final IndexInput data;
@@ -263,37 +259,35 @@ public FieldEntry(
         throws IOException {
       this.similarityFunction = vectorIndexFieldMetadata.vectorSimilarityFunction;
       this.vectorDimension = vectorIndexFieldMetadata.vectorDimension;
-      this.vectorIndexOffset = vectorIndexFieldMetadata.vectorIndexOffset;
-      this.vectorIndexLength = vectorIndexFieldMetadata.vectorIndexLength;
-      this.pqCodebooksAndVectorsLength = vectorIndexFieldMetadata.pqCodebooksAndVectorsLength;
-      this.pqCodebooksAndVectorsOffset = vectorIndexFieldMetadata.pqCodebooksAndVectorsOffset;
       this.graphNodeIdToDocMap = vectorIndexFieldMetadata.graphNodeIdToDocMap;
-
       this.vectorIndexFieldDataFileName =
           baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
 
-      assert vectorIndexLength > 0 : "Read empty JVector graph";
+      final long graphOffset = vectorIndexFieldMetadata.vectorIndexOffset;
+      final long graphLength = vectorIndexFieldMetadata.vectorIndexLength;
+      assert graphLength > 0 : "Read empty JVector graph";
       this.data = directory.openInput(vectorIndexFieldDataFileName, state.context);
       // For the slice we would like to include the Lucene header, unfortunately, we have to do
       // this because jVector use global offsets instead of local offsets
       final long sliceLength =
-          vectorIndexLength
+          graphLength
               + CodecUtil.indexHeaderLength(
                   JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
       // Load the graph index from cloned slices of data (no need to close)
       final var indexReaderSupplier =
           new JVectorRandomAccessReader.Supplier(data.slice("graph", 0, sliceLength));
-      this.index = OnDiskGraphIndex.load(indexReaderSupplier, vectorIndexOffset);
+      this.index = OnDiskGraphIndex.load(indexReaderSupplier, graphOffset);
 
       // If quantized load the compressed product quantized vectors with their codebooks
-      if (pqCodebooksAndVectorsLength > 0) {
-        assert pqCodebooksAndVectorsOffset > 0;
-        if (pqCodebooksAndVectorsOffset < vectorIndexOffset) {
+      final long pqOffset = vectorIndexFieldMetadata.pqCodebooksAndVectorsOffset;
+      final long pqLength = vectorIndexFieldMetadata.pqCodebooksAndVectorsLength;
+      if (pqLength > 0) {
+        assert pqOffset > 0;
+        if (pqOffset < graphOffset) {
           throw new IllegalArgumentException(
-              "pqCodebooksAndVectorsOffset must be greater than vectorIndexOffset");
+              "pqOffset must be greater than vectorIndexOffset");
         }
-        final var pqSlice =
-            data.slice("pq", pqCodebooksAndVectorsOffset, pqCodebooksAndVectorsLength);
+        final var pqSlice = data.slice("pq", pqOffset, pqLength);
         try (final var randomAccessReader = new JVectorRandomAccessReader(pqSlice)) {
           this.pqVectors = PQVectors.load(randomAccessReader);
         }

From af695713c63543c346b0a543bf5c5c45554fe712 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 08:24:17 +0000
Subject: [PATCH 67/86] Use proper index slicing

---
 .../codecs/jvector/JVectorIndexWriter.java      |  8 ++++++--
 .../sandbox/codecs/jvector/JVectorReader.java   | 17 +++++++++--------
 .../sandbox/codecs/jvector/JVectorWriter.java   |  8 ++++----
 3 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
index 6483d7c71393..e4a03571f9f3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -27,19 +27,23 @@
  */
 public class JVectorIndexWriter implements IndexWriter {
   private final IndexOutput indexOutputDelegate;
+  /// Initial offset of the writer, which will be subtracted from [position()][#position()] to trick
+  /// JVector into using offsets that work for slices used by the readers.
+  private final long offset;
 
   public JVectorIndexWriter(IndexOutput indexOutputDelegate) {
     this.indexOutputDelegate = indexOutputDelegate;
+    this.offset = indexOutputDelegate.getFilePointer();
   }
 
   @Override
   public long position() throws IOException {
-    return indexOutputDelegate.getFilePointer();
+    return indexOutputDelegate.getFilePointer() - offset;
   }
 
   @Override
   public void close() throws IOException {
-    indexOutputDelegate.close();
+    // Let the user close the delegate
   }
 
   @Override
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index c158f724fc9e..67b1117d8f4e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -267,16 +267,17 @@ public FieldEntry(
       final long graphLength = vectorIndexFieldMetadata.vectorIndexLength;
       assert graphLength > 0 : "Read empty JVector graph";
       this.data = directory.openInput(vectorIndexFieldDataFileName, state.context);
-      // For the slice we would like to include the Lucene header, unfortunately, we have to do
-      // this because jVector use global offsets instead of local offsets
-      final long sliceLength =
-          graphLength
-              + CodecUtil.indexHeaderLength(
-                  JVectorFormat.VECTOR_INDEX_CODEC_NAME, state.segmentSuffix);
+      CodecUtil.checkIndexHeader(
+          this.data,
+          JVectorFormat.VECTOR_INDEX_CODEC_NAME,
+          JVectorFormat.VERSION_START,
+          JVectorFormat.VERSION_CURRENT,
+          state.segmentInfo.getId(),
+          state.segmentSuffix);
       // Load the graph index from cloned slices of data (no need to close)
       final var indexReaderSupplier =
-          new JVectorRandomAccessReader.Supplier(data.slice("graph", 0, sliceLength));
-      this.index = OnDiskGraphIndex.load(indexReaderSupplier, graphOffset);
+          new JVectorRandomAccessReader.Supplier(data.slice("graph", graphOffset, graphLength));
+      this.index = OnDiskGraphIndex.load(indexReaderSupplier);
 
       // If quantized load the compressed product quantized vectors with their codebooks
       final long pqOffset = vectorIndexFieldMetadata.pqCodebooksAndVectorsOffset;
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 8ec2332437d8..077caa270651 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -324,8 +324,7 @@ private VectorIndexFieldMetadata writeGraph(
 
     try (IndexOutput indexOutput =
             segmentWriteState.directory.createOutput(
-                vectorIndexFieldFileName, segmentWriteState.context);
-        final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput)) {
+                vectorIndexFieldFileName, segmentWriteState.context)) {
       // Header for the field data file
       CodecUtil.writeIndexHeader(
           indexOutput,
@@ -333,6 +332,7 @@ private VectorIndexFieldMetadata writeGraph(
           JVectorFormat.VERSION_CURRENT,
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
+      final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput);
       final long startOffset = indexOutput.getFilePointer();
       final var writerBuilder =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
@@ -346,7 +346,7 @@ private VectorIndexFieldMetadata writeGraph(
                 FeatureId.INLINE_VECTORS,
                 nodeId -> new InlineVectors.State(randomAccessVectorValues.getVector(nodeId)));
         writer.write(suppliers);
-        final long endGraphOffset = jVectorIndexWriter.position();
+        final long endGraphOffset = indexOutput.getFilePointer();
 
         // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed
         // vectors
@@ -356,7 +356,7 @@ private VectorIndexFieldMetadata writeGraph(
           pqOffset = endGraphOffset;
           // write the compressed vectors and codebooks to disk
           pqVectors.write(jVectorIndexWriter);
-          pqLength = jVectorIndexWriter.position() - endGraphOffset;
+          pqLength = indexOutput.getFilePointer() - endGraphOffset;
         } else {
           pqOffset = 0;
           pqLength = 0;

From b03d201af298f38ad4326706a0ed308d23e599b9 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 08:27:56 +0000
Subject: [PATCH 68/86] Remove segmentName arg from getGraph

---
 .../sandbox/codecs/jvector/JVectorWriter.java   | 17 +++--------------
 1 file changed, 3 insertions(+), 14 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 077caa270651..38f0a70e1b3e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -246,13 +246,8 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
       }
 
       final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(newDocIds);
-      OnHeapGraphIndex graph =
-          getGraph(
-              buildScoreProvider,
-              randomAccessVectorValues,
-              fieldInfo,
-              segmentWriteState.segmentInfo.name,
-              Runnable::run);
+      final var graph =
+          getGraph(buildScoreProvider, randomAccessVectorValues, fieldInfo, Runnable::run);
       writeField(
           field.fieldInfo,
           randomAccessVectorValues,
@@ -654,12 +649,7 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
     }
     final var graphNodeIdToDocMap = new GraphNodeIdToDocMap(docIds);
     final var graph =
-        getGraph(
-            buildScoreProvider,
-            ravv,
-            fieldInfo,
-            segmentWriteState.segmentInfo.name,
-            mergeState.intraMergeTaskExecutor);
+        getGraph(buildScoreProvider, ravv, fieldInfo, mergeState.intraMergeTaskExecutor);
     writeField(fieldInfo, ravv, pqVectors, null, graphNodeIdToDocMap, graph);
   }
 
@@ -776,7 +766,6 @@ public OnHeapGraphIndex getGraph(
       BuildScoreProvider buildScoreProvider,
       RandomAccessVectorValues randomAccessVectorValues,
       FieldInfo fieldInfo,
-      String segmentName,
       Executor executor) {
     assert randomAccessVectorValues.size() > 0 : "Cannot build empty graph";
     final GraphIndexBuilder graphIndexBuilder =

From 4267ec0e1fa23e03ac1b1462fdceab4233f25306 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 08:36:31 +0000
Subject: [PATCH 69/86] Improve primary index file handling on write

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 54 ++++++++-----------
 1 file changed, 21 insertions(+), 33 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 38f0a70e1b3e..561a573d4925 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -103,8 +103,7 @@ public class JVectorWriter extends KnnVectorsWriter {
   private final List<FieldWriter> fields = new ArrayList<>();
 
   private final IndexOutput meta;
-  private final IndexOutput vectorIndex;
-  private final String indexDataFileName;
+  private final IndexOutput data;
   private final String baseDataFileName;
   private final SegmentWriteState segmentWriteState;
   private final int maxConn;
@@ -138,25 +137,16 @@ public JVectorWriter(
     this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
     this.minimumBatchSizeForQuantization = minimumBatchSizeForQuantization;
     this.hierarchyEnabled = hierarchyEnabled;
-    String metaFileName =
-        IndexFileNames.segmentFileName(
-            segmentWriteState.segmentInfo.name,
-            segmentWriteState.segmentSuffix,
-            JVectorFormat.META_EXTENSION);
-
-    this.indexDataFileName =
-        IndexFileNames.segmentFileName(
-            segmentWriteState.segmentInfo.name,
-            segmentWriteState.segmentSuffix,
-            JVectorFormat.VECTOR_INDEX_EXTENSION);
     this.baseDataFileName =
         segmentWriteState.segmentInfo.name + "_" + segmentWriteState.segmentSuffix;
 
-    boolean success = false;
     try {
+      final String metaFileName =
+          IndexFileNames.segmentFileName(
+              segmentWriteState.segmentInfo.name,
+              segmentWriteState.segmentSuffix,
+              JVectorFormat.META_EXTENSION);
       meta = segmentWriteState.directory.createOutput(metaFileName, segmentWriteState.context);
-      vectorIndex =
-          segmentWriteState.directory.createOutput(indexDataFileName, segmentWriteState.context);
       CodecUtil.writeIndexHeader(
           meta,
           JVectorFormat.META_CODEC_NAME,
@@ -164,18 +154,21 @@ public JVectorWriter(
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
 
+      final String dataFileName =
+          IndexFileNames.segmentFileName(
+              segmentWriteState.segmentInfo.name,
+              segmentWriteState.segmentSuffix,
+              JVectorFormat.VECTOR_INDEX_EXTENSION);
+      data = segmentWriteState.directory.createOutput(dataFileName, segmentWriteState.context);
       CodecUtil.writeIndexHeader(
-          vectorIndex,
+          data,
           JVectorFormat.VECTOR_INDEX_CODEC_NAME,
           JVectorFormat.VERSION_CURRENT,
           segmentWriteState.segmentInfo.getId(),
           segmentWriteState.segmentSuffix);
-
-      success = true;
-    } finally {
-      if (!success) {
-        IOUtils.closeWhileHandlingException(this);
-      }
+    } catch (Throwable t) {
+      IOUtils.closeWhileSuppressingExceptions(t, this);
+      throw t;
     }
   }
 
@@ -463,20 +456,15 @@ public void finish() throws IOException {
     }
     finished = true;
 
-    if (meta != null) {
-      // write end of fields marker
-      meta.writeInt(-1);
-      CodecUtil.writeFooter(meta);
-    }
-
-    if (vectorIndex != null) {
-      CodecUtil.writeFooter(vectorIndex);
-    }
+    // write end of fields marker
+    meta.writeInt(-1);
+    CodecUtil.writeFooter(meta);
+    CodecUtil.writeFooter(data);
   }
 
   @Override
   public void close() throws IOException {
-    IOUtils.close(meta, vectorIndex);
+    IOUtils.close(meta, data);
   }
 
   @Override

From f516762cc305412b8dac068ba70119034534c0f3 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 08:43:54 +0000
Subject: [PATCH 70/86] Remove FieldInfos field

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorReader.java | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 67b1117d8f4e..4243429cdbec 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -49,7 +49,6 @@ public class JVectorReader extends KnnVectorsReader {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
 
-  private final FieldInfos fieldInfos;
   private final String baseDataFileName;
   // Maps field name to field entries
   private final Map<String, FieldEntry> fieldEntryMap = new HashMap<>(1);
@@ -58,7 +57,6 @@ public class JVectorReader extends KnnVectorsReader {
 
   public JVectorReader(SegmentReadState state) throws IOException {
     this.state = state;
-    this.fieldInfos = state.fieldInfos;
     this.baseDataFileName = state.segmentInfo.name + "_" + state.segmentSuffix;
     final String metaFileName =
         IndexFileNames.segmentFileName(
@@ -73,7 +71,7 @@ public JVectorReader(SegmentReadState state) throws IOException {
           JVectorFormat.VERSION_CURRENT,
           state.segmentInfo.getId(),
           state.segmentSuffix);
-      readFields(meta);
+      readFields(meta, state.fieldInfos);
       CodecUtil.checkFooter(meta);
 
       success = true;
@@ -235,7 +233,7 @@ public void close() throws IOException {
     fieldEntryMap.clear();
   }
 
-  private void readFields(ChecksumIndexInput meta) throws IOException {
+  private void readFields(ChecksumIndexInput meta, FieldInfos fieldInfos) throws IOException {
     for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
       final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); // read field number
       JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata =

From df18dc8cd1f084501da2a47d7da6824fb4eaca3e Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 08:45:21 +0000
Subject: [PATCH 71/86] Fixup improve missing graph

---
 .../lucene/sandbox/codecs/jvector/JVectorReader.java     | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 4243429cdbec..34782bc6df33 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -96,7 +96,7 @@ public void checkIntegrity() throws IOException {
   @Override
   public FloatVectorValues getFloatVectorValues(String field) throws IOException {
     final FieldEntry fieldEntry = fieldEntryMap.get(field);
-    if (fieldEntry == null || fieldEntry.index == null) {
+    if (fieldEntry == null) {
       return new FloatVectorValues() {
         @Override
         public float[] vectorValue(int ord) throws IOException {
@@ -110,7 +110,7 @@ public FloatVectorValues copy() throws IOException {
 
         @Override
         public int dimension() {
-          return fieldEntry.vectorDimension;
+          return 0;
         }
 
         @Override
@@ -148,11 +148,6 @@ public Optional<ProductQuantization> getProductQuantizationForField(String field
     return Optional.of(fieldEntry.pqVectors.getCompressor());
   }
 
-  public boolean hasIndex(String field) {
-    final var fieldEntry = fieldEntryMap.get(field);
-    return fieldEntry != null && fieldEntry.index != null;
-  }
-
   @Override
   public void search(String field, float[] target, KnnCollector knnCollector, AcceptDocs acceptDocs)
       throws IOException {

From bbdcee5d711f73bad1e5848ae59c3a3e0eaa9232 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Sat, 22 Nov 2025 09:21:19 +0000
Subject: [PATCH 72/86] Write all fields to the same file

---
 .../sandbox/codecs/jvector/JVectorReader.java | 113 ++++++++++--------
 .../sandbox/codecs/jvector/JVectorWriter.java |  28 +----
 2 files changed, 66 insertions(+), 75 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
index 34782bc6df33..670c45c54447 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorReader.java
@@ -31,7 +31,9 @@
 import io.github.jbellis.jvector.vector.types.VectorTypeSupport;
 import java.io.Closeable;
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.HashMap;
+import java.util.List;
 import java.util.Map;
 import java.util.Optional;
 import org.apache.lucene.codecs.CodecUtil;
@@ -49,48 +51,65 @@ public class JVectorReader extends KnnVectorsReader {
   private static final VectorTypeSupport VECTOR_TYPE_SUPPORT =
       VectorizationProvider.getInstance().getVectorTypeSupport();
 
-  private final String baseDataFileName;
+  private final IndexInput data;
   // Maps field name to field entries
-  private final Map<String, FieldEntry> fieldEntryMap = new HashMap<>(1);
-  private final Directory directory;
-  private final SegmentReadState state;
+  private final Map<String, FieldEntry> fieldEntryMap;
 
   public JVectorReader(SegmentReadState state) throws IOException {
-    this.state = state;
-    this.baseDataFileName = state.segmentInfo.name + "_" + state.segmentSuffix;
+    final List<JVectorWriter.VectorIndexFieldMetadata> fieldMetaList = new ArrayList<>();
     final String metaFileName =
         IndexFileNames.segmentFileName(
             state.segmentInfo.name, state.segmentSuffix, JVectorFormat.META_EXTENSION);
-    this.directory = state.directory;
-    boolean success = false;
     try (ChecksumIndexInput meta = state.directory.openChecksumInput(metaFileName)) {
-      CodecUtil.checkIndexHeader(
-          meta,
-          JVectorFormat.META_CODEC_NAME,
+      Throwable priorE = null;
+      try {
+        CodecUtil.checkIndexHeader(
+            meta,
+            JVectorFormat.META_CODEC_NAME,
+            JVectorFormat.VERSION_START,
+            JVectorFormat.VERSION_CURRENT,
+            state.segmentInfo.getId(),
+            state.segmentSuffix);
+
+        JVectorWriter.VectorIndexFieldMetadata fieldMeta;
+        while ((fieldMeta = parseNextField(meta, state.fieldInfos)) != null) {
+          fieldMetaList.add(fieldMeta);
+        }
+      } catch (Throwable t) {
+        priorE = t;
+      } finally {
+        CodecUtil.checkFooter(meta, priorE);
+      }
+
+      final String dataFileName =
+          IndexFileNames.segmentFileName(
+              state.segmentInfo.name, state.segmentSuffix, JVectorFormat.VECTOR_INDEX_EXTENSION);
+      this.data =
+          state.directory.openInput(
+              dataFileName, state.context.withHints(FileTypeHint.DATA, DataAccessHint.RANDOM));
+
+
+      CodecUtil.checkHeader(
+          data,
+          JVectorFormat.VECTOR_INDEX_CODEC_NAME,
           JVectorFormat.VERSION_START,
-          JVectorFormat.VERSION_CURRENT,
-          state.segmentInfo.getId(),
-          state.segmentSuffix);
-      readFields(meta, state.fieldInfos);
-      CodecUtil.checkFooter(meta);
-
-      success = true;
-    } finally {
-      if (!success) {
-        IOUtils.closeWhileHandlingException(this);
+          JVectorFormat.VERSION_CURRENT);
+      CodecUtil.retrieveChecksum(data);
+
+      this.fieldEntryMap = new HashMap<>(fieldMetaList.size());
+      for (var fieldMeta : fieldMetaList) {
+        final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(fieldMeta.fieldNumber);
+        if (fieldEntryMap.containsKey(fieldInfo.name)) {
+          throw new CorruptIndexException("Duplicate field: " + fieldInfo.name, meta);
+        }
+        fieldEntryMap.put(fieldInfo.name, new FieldEntry(data, fieldMeta));
       }
     }
   }
 
   @Override
   public void checkIntegrity() throws IOException {
-    for (FieldEntry fieldEntry : fieldEntryMap.values()) {
-      // Verify the vector index file
-      try (var indexInput =
-          state.directory.openInput(fieldEntry.vectorIndexFieldDataFileName, IOContext.READONCE)) {
-        CodecUtil.checksumEntireFile(indexInput);
-      }
-    }
+    CodecUtil.checksumEntireFile(data);
   }
 
   @Override
@@ -226,47 +245,39 @@ public void close() throws IOException {
       IOUtils.close(fieldEntry);
     }
     fieldEntryMap.clear();
+    IOUtils.close(data);
   }
 
-  private void readFields(ChecksumIndexInput meta, FieldInfos fieldInfos) throws IOException {
-    for (int fieldNumber = meta.readInt(); fieldNumber != -1; fieldNumber = meta.readInt()) {
-      final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber); // read field number
-      JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata =
-          new JVectorWriter.VectorIndexFieldMetadata(meta);
-      assert fieldInfo.number == vectorIndexFieldMetadata.fieldNumber;
-      fieldEntryMap.put(fieldInfo.name, new FieldEntry(fieldInfo, vectorIndexFieldMetadata));
+  private static JVectorWriter.VectorIndexFieldMetadata parseNextField(
+      IndexInput meta, FieldInfos fieldInfos) throws IOException {
+    final int fieldNumber = meta.readInt();
+    if (fieldNumber == -1) {
+      return null;
+    }
+
+    final FieldInfo fieldInfo = fieldInfos.fieldInfo(fieldNumber);
+    if (fieldInfo == null) {
+      throw new CorruptIndexException("Invalid field number: " + fieldNumber, meta);
     }
+
+    return new JVectorWriter.VectorIndexFieldMetadata(meta);
   }
 
   class FieldEntry implements Closeable {
     private final VectorSimilarityFunction similarityFunction;
-    private final int vectorDimension;
-    private final String vectorIndexFieldDataFileName;
     private final GraphNodeIdToDocMap graphNodeIdToDocMap;
-    private final IndexInput data;
     private final OnDiskGraphIndex index;
     private final PQVectors pqVectors; // The product quantized vectors with their codebooks
 
     public FieldEntry(
-        FieldInfo fieldInfo, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata)
+        IndexInput data, JVectorWriter.VectorIndexFieldMetadata vectorIndexFieldMetadata)
         throws IOException {
       this.similarityFunction = vectorIndexFieldMetadata.vectorSimilarityFunction;
-      this.vectorDimension = vectorIndexFieldMetadata.vectorDimension;
       this.graphNodeIdToDocMap = vectorIndexFieldMetadata.graphNodeIdToDocMap;
-      this.vectorIndexFieldDataFileName =
-          baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
 
       final long graphOffset = vectorIndexFieldMetadata.vectorIndexOffset;
       final long graphLength = vectorIndexFieldMetadata.vectorIndexLength;
       assert graphLength > 0 : "Read empty JVector graph";
-      this.data = directory.openInput(vectorIndexFieldDataFileName, state.context);
-      CodecUtil.checkIndexHeader(
-          this.data,
-          JVectorFormat.VECTOR_INDEX_CODEC_NAME,
-          JVectorFormat.VERSION_START,
-          JVectorFormat.VERSION_CURRENT,
-          state.segmentInfo.getId(),
-          state.segmentSuffix);
       // Load the graph index from cloned slices of data (no need to close)
       final var indexReaderSupplier =
           new JVectorRandomAccessReader.Supplier(data.slice("graph", graphOffset, graphLength));
@@ -292,7 +303,7 @@ public FieldEntry(
 
     @Override
     public void close() throws IOException {
-      IOUtils.close(data);
+      index.close();
     }
   }
 }
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 561a573d4925..9d2fa5c64cdc 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -104,8 +104,6 @@ public class JVectorWriter extends KnnVectorsWriter {
 
   private final IndexOutput meta;
   private final IndexOutput data;
-  private final String baseDataFileName;
-  private final SegmentWriteState segmentWriteState;
   private final int maxConn;
   private final int beamWidth;
   private final float degreeOverflow;
@@ -129,7 +127,6 @@ public JVectorWriter(
       int minimumBatchSizeForQuantization,
       boolean hierarchyEnabled)
       throws IOException {
-    this.segmentWriteState = segmentWriteState;
     this.maxConn = maxConn;
     this.beamWidth = beamWidth;
     this.degreeOverflow = degreeOverflow;
@@ -137,8 +134,6 @@ public JVectorWriter(
     this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
     this.minimumBatchSizeForQuantization = minimumBatchSizeForQuantization;
     this.hierarchyEnabled = hierarchyEnabled;
-    this.baseDataFileName =
-        segmentWriteState.segmentInfo.name + "_" + segmentWriteState.segmentSuffix;
 
     try {
       final String metaFileName =
@@ -306,22 +301,8 @@ private VectorIndexFieldMetadata writeGraph(
       OrdinalMapper ordinalMapper,
       GraphNodeIdToDocMap graphNodeIdToDocMap)
       throws IOException {
-    // field data file, which contains the graph
-    final String vectorIndexFieldFileName =
-        baseDataFileName + "_" + fieldInfo.name + "." + JVectorFormat.VECTOR_INDEX_EXTENSION;
-
-    try (IndexOutput indexOutput =
-            segmentWriteState.directory.createOutput(
-                vectorIndexFieldFileName, segmentWriteState.context)) {
-      // Header for the field data file
-      CodecUtil.writeIndexHeader(
-          indexOutput,
-          JVectorFormat.VECTOR_INDEX_CODEC_NAME,
-          JVectorFormat.VERSION_CURRENT,
-          segmentWriteState.segmentInfo.getId(),
-          segmentWriteState.segmentSuffix);
-      final var jVectorIndexWriter = new JVectorIndexWriter(indexOutput);
-      final long startOffset = indexOutput.getFilePointer();
+    try (final var jVectorIndexWriter = new JVectorIndexWriter(data)) {
+      final long startOffset = data.getFilePointer();
       final var writerBuilder =
           new OnDiskSequentialGraphIndexWriter.Builder(graph, jVectorIndexWriter)
               .with(new InlineVectors(randomAccessVectorValues.dimension()));
@@ -334,7 +315,7 @@ private VectorIndexFieldMetadata writeGraph(
                 FeatureId.INLINE_VECTORS,
                 nodeId -> new InlineVectors.State(randomAccessVectorValues.getVector(nodeId)));
         writer.write(suppliers);
-        final long endGraphOffset = indexOutput.getFilePointer();
+        final long endGraphOffset = data.getFilePointer();
 
         // If PQ is enabled and we have enough vectors, write the PQ codebooks and compressed
         // vectors
@@ -344,12 +325,11 @@ private VectorIndexFieldMetadata writeGraph(
           pqOffset = endGraphOffset;
           // write the compressed vectors and codebooks to disk
           pqVectors.write(jVectorIndexWriter);
-          pqLength = indexOutput.getFilePointer() - endGraphOffset;
+          pqLength = data.getFilePointer() - endGraphOffset;
         } else {
           pqOffset = 0;
           pqLength = 0;
         }
-        CodecUtil.writeFooter(indexOutput);
 
         return new VectorIndexFieldMetadata(
             fieldInfo.number,

From f90eacfb40210d030957c026ecf86bfad829e379 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 24 Nov 2025 20:34:05 +0000
Subject: [PATCH 73/86] fixup! Fix missing @Override

---
 .../lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java | 1 -
 1 file changed, 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index d7622ebc85a8..3eda89105ce3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -54,7 +54,6 @@ public float readFloat() throws IOException {
     return Float.intBitsToFloat(indexInputDelegate.readInt());
   }
 
-  // TODO: bring back to override when upgrading jVector again
   @Override
   public long readLong() throws IOException {
     return indexInputDelegate.readLong();

From cecc473d5721a968bed2f36c26e5fc16235dd5db Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 25 Nov 2025 22:24:28 +0000
Subject: [PATCH 74/86] Fix remove sorting

---
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 9d2fa5c64cdc..74785900b588 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -468,7 +468,7 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to
     // the jVector ordinal
     private final List<VectorFloat<?>> vectors = new ArrayList<>();
-    private DocsWithFieldSet docIds;
+    private final DocsWithFieldSet docIds;
 
     FieldWriter(FieldInfo fieldInfo) {
       /** For creating a new field from a flat field vectors writer. */

From 1d2b4e83d8a0a5348901b9ba86a257398f7e8625 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 25 Nov 2025 22:42:43 +0000
Subject: [PATCH 75/86] Avoid extra copies in
 RandomAccessMergedFloatVectorValues.getVector()

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 25 ++++++++++++-------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 74785900b588..020dab7c6b61 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -686,10 +686,19 @@ public int dimension() {
     }
 
     @Override
-    public VectorFloat<?> getVector(int nodeId) {
-      final var vector = VECTOR_TYPE_SUPPORT.createFloatVector(dimension);
-      getVectorInto(nodeId, vector, 0);
-      return vector;
+    public VectorFloat<?> getVector(int node) {
+      final FloatVectorValues values = vectors[ordToReader.applyAsInt(node)];
+      final int ord = ordToReaderOrd.applyAsInt(node);
+
+      if (values instanceof JVectorFloatVectorValues jVectorValues) {
+        return jVectorValues.vectorFloatValue(ord);
+      }
+
+      try {
+        return VECTOR_TYPE_SUPPORT.createFloatVector(values.vectorValue(ord));
+      } catch (IOException e) {
+        throw new UncheckedIOException(e);
+      }
     }
 
     @Override
@@ -701,16 +710,14 @@ public void getVectorInto(int node, VectorFloat<?> destinationVector, int offset
         jVectorValues.getVectorInto(ord, destinationVector, offset);
       }
 
-      final float[] srcVector;
+      final VectorFloat<?> srcVector;
       try {
-        srcVector = values.vectorValue(ord);
+        srcVector = VECTOR_TYPE_SUPPORT.createFloatVector(values.vectorValue(ord));
       } catch (IOException e) {
         throw new UncheckedIOException(e);
       }
 
-      for (int i = 0; i < srcVector.length; ++i) {
-        destinationVector.set(i + offset, srcVector[i]);
-      }
+      destinationVector.copyFrom(srcVector, 0, offset, srcVector.length());
     }
 
     @Override

From cfbf4c2eeca554f35d72c594d7994e0d0febfc15 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Tue, 25 Nov 2025 22:55:38 +0000
Subject: [PATCH 76/86] Move PQ encoding to FieldWriter.addValue instead of
 flush

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 48 ++++++++++++++++---
 1 file changed, 41 insertions(+), 7 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 020dab7c6b61..0866e0af30ef 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -29,6 +29,7 @@
 import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
 import io.github.jbellis.jvector.graph.disk.feature.InlineVectors;
 import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
+import io.github.jbellis.jvector.quantization.MutablePQVectors;
 import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.quantization.ProductQuantization;
 import io.github.jbellis.jvector.vector.VectorSimilarityFunction;
@@ -176,7 +177,8 @@ public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException
               + "This can provides much greater savings in storage and memory";
       throw new UnsupportedOperationException(errorMessage);
     }
-    FieldWriter newField = new FieldWriter(fieldInfo);
+    final int M = numberOfSubspacesPerVectorSupplier.applyAsInt(fieldInfo.getVectorDimension());
+    final FieldWriter newField = new FieldWriter(fieldInfo, minimumBatchSizeForQuantization, M);
 
     fields.add(newField);
     return newField;
@@ -216,17 +218,15 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
       }
       final RandomAccessVectorValues randomAccessVectorValues = field.toRandomAccessVectorValues();
       final BuildScoreProvider buildScoreProvider;
-      final PQVectors pqVectors;
+      final PQVectors pqVectors = field.getCompressedVectors();
       final FieldInfo fieldInfo = field.fieldInfo;
-      if (randomAccessVectorValues.size() >= minimumBatchSizeForQuantization) {
-        pqVectors = getPQVectors(randomAccessVectorValues, fieldInfo);
+      if (pqVectors != null) {
         buildScoreProvider =
             BuildScoreProvider.pqBuildScoreProvider(
                 JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()),
                 pqVectors);
       } else {
         // Not enough vectors for quantization; use full precision vectors instead
-        pqVectors = null;
         buildScoreProvider =
             BuildScoreProvider.randomAccessScoreProvider(
                 randomAccessVectorValues,
@@ -470,10 +470,18 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final List<VectorFloat<?>> vectors = new ArrayList<>();
     private final DocsWithFieldSet docIds;
 
-    FieldWriter(FieldInfo fieldInfo) {
+    // PQ fields
+    private final int pqThreshold;
+    private final int pqSubspaceCount;
+    private MutablePQVectors pqVectors;
+
+    FieldWriter(FieldInfo fieldInfo, int pqThreshold, int pqSubspaceCount) {
       /** For creating a new field from a flat field vectors writer. */
       this.fieldInfo = fieldInfo;
       this.docIds = new DocsWithFieldSet();
+      this.pqThreshold = pqThreshold;
+      this.pqSubspaceCount = pqSubspaceCount;
+      this.pqVectors = null;
     }
 
     @Override
@@ -485,7 +493,29 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
                 + "\" appears more than once in this document (only one value is allowed per field)");
       }
       docIds.add(docID);
-      vectors.add(VECTOR_TYPE_SUPPORT.createFloatVector(copyValue(vectorValue)));
+      final var vector = VECTOR_TYPE_SUPPORT.createFloatVector(copyValue(vectorValue));
+      vectors.add(vector);
+
+      if (pqVectors != null) {
+        pqVectors.encodeAndSet(vectors.size() - 1, vector);
+      } else if (vectors.size() > pqThreshold) {
+        final boolean globallyCenter =
+            switch (fieldInfo.getVectorSimilarityFunction()) {
+              case EUCLIDEAN -> true;
+              case COSINE, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> false;
+            };
+        final int pqCenterCount = Math.min(256, vectors.size());
+        final var pq =
+            ProductQuantization.compute(
+                toRandomAccessVectorValues(),
+                pqSubspaceCount,
+                pqCenterCount,
+                globallyCenter);
+        pqVectors = new MutablePQVectors(pq);
+        for (int i = 0; i < vectors.size(); ++i) {
+          pqVectors.encodeAndSet(i, vectors.get(i));
+        }
+      }
     }
 
     @Override
@@ -497,6 +527,10 @@ public RandomAccessVectorValues toRandomAccessVectorValues() {
       return new ListRandomAccessVectorValues(vectors, fieldInfo.getVectorDimension());
     }
 
+    public PQVectors getCompressedVectors() {
+      return pqVectors;
+    }
+
     @Override
     public long ramBytesUsed() {
       return SHALLOW_SIZE

From dc3dba6090aa95635df0df0892af84ba042a4f77 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Thu, 4 Dec 2025 14:52:46 +0000
Subject: [PATCH 77/86] Use bulk read methods where possible (requires JVector
 byte-order pull 577)

---
 .../sandbox/codecs/jvector/JVectorIndexWriter.java | 10 ++++++++++
 .../codecs/jvector/JVectorRandomAccessReader.java  | 14 +++-----------
 2 files changed, 13 insertions(+), 11 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
index e4a03571f9f3..5cbfece4c0e1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorIndexWriter.java
@@ -19,6 +19,9 @@
 
 import io.github.jbellis.jvector.disk.IndexWriter;
 import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.nio.ByteOrder;
+
 import org.apache.lucene.store.IndexOutput;
 
 /**
@@ -97,6 +100,13 @@ public void writeFloat(float v) throws IOException {
     indexOutputDelegate.writeInt(Float.floatToIntBits(v));
   }
 
+  @Override
+  public void writeFloats(float[] floats, int offset, int count) throws IOException {
+    final ByteBuffer buf = ByteBuffer.allocate(count * Float.BYTES);
+    buf.order(ByteOrder.LITTLE_ENDIAN).asFloatBuffer().put(floats, offset, count);
+    write(buf.array());
+  }
+
   @Override
   public void writeDouble(double v) throws IOException {
     writeLong(Double.doubleToLongBits(v));
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
index 3eda89105ce3..5374b822795e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorRandomAccessReader.java
@@ -22,7 +22,6 @@
 import java.io.EOFException;
 import java.io.IOException;
 import java.nio.ByteBuffer;
-import java.nio.FloatBuffer;
 import org.apache.lucene.store.IndexInput;
 
 /// Implements JVector reader capabilities over a Lucene IndexInput
@@ -92,24 +91,17 @@ public void readFully(ByteBuffer buffer) throws IOException {
 
   @Override
   public void readFully(long[] vector) throws IOException {
-    for (int i = 0; i < vector.length; i++) {
-      vector[i] = readLong();
-    }
+    indexInputDelegate.readLongs(vector, 0, vector.length);
   }
 
   @Override
   public void read(int[] ints, int offset, int count) throws IOException {
-    for (int i = 0; i < count; i++) {
-      ints[offset + i] = readInt();
-    }
+    indexInputDelegate.readInts(ints, offset, count);
   }
 
   @Override
   public void read(float[] floats, int offset, int count) throws IOException {
-    final ByteBuffer byteBuffer = ByteBuffer.allocate(Float.BYTES * count);
-    indexInputDelegate.readBytes(byteBuffer.array(), offset, Float.BYTES * count);
-    FloatBuffer buffer = byteBuffer.asFloatBuffer();
-    buffer.get(floats, offset, count);
+    indexInputDelegate.readFloats(floats, offset, count);
   }
 
   @Override

From 9ef6dd913529f76b2b91ae8bfe352d5999214a16 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Fri, 5 Dec 2025 16:30:04 +0000
Subject: [PATCH 78/86] Move BuildScoreProvider to FieldWriter

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 72 +++++++++++++++----
 1 file changed, 58 insertions(+), 14 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 0866e0af30ef..7f659dc576bf 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -29,6 +29,7 @@
 import io.github.jbellis.jvector.graph.disk.feature.FeatureId;
 import io.github.jbellis.jvector.graph.disk.feature.InlineVectors;
 import io.github.jbellis.jvector.graph.similarity.BuildScoreProvider;
+import io.github.jbellis.jvector.graph.similarity.SearchScoreProvider;
 import io.github.jbellis.jvector.quantization.MutablePQVectors;
 import io.github.jbellis.jvector.quantization.PQVectors;
 import io.github.jbellis.jvector.quantization.ProductQuantization;
@@ -42,6 +43,7 @@
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.List;
+import java.util.Objects;
 import java.util.concurrent.CompletableFuture;
 import java.util.concurrent.Executor;
 import java.util.function.IntUnaryOperator;
@@ -217,21 +219,9 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
         ordinalMapper = null;
       }
       final RandomAccessVectorValues randomAccessVectorValues = field.toRandomAccessVectorValues();
-      final BuildScoreProvider buildScoreProvider;
+      final BuildScoreProvider buildScoreProvider = field.buildScoreProvider;
       final PQVectors pqVectors = field.getCompressedVectors();
       final FieldInfo fieldInfo = field.fieldInfo;
-      if (pqVectors != null) {
-        buildScoreProvider =
-            BuildScoreProvider.pqBuildScoreProvider(
-                JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()),
-                pqVectors);
-      } else {
-        // Not enough vectors for quantization; use full precision vectors instead
-        buildScoreProvider =
-            BuildScoreProvider.randomAccessScoreProvider(
-                randomAccessVectorValues,
-                JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction()));
-      }
 
       final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(newDocIds);
       final var graph =
@@ -467,9 +457,11 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final FieldInfo fieldInfo;
     // The ordering of docIds matches the ordering of vectors, the index in this list corresponds to
     // the jVector ordinal
-    private final List<VectorFloat<?>> vectors = new ArrayList<>();
+    private final List<VectorFloat<?>> vectors;
     private final DocsWithFieldSet docIds;
 
+    private final DelegatingBuildScoreProvider buildScoreProvider;
+
     // PQ fields
     private final int pqThreshold;
     private final int pqSubspaceCount;
@@ -478,7 +470,17 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     FieldWriter(FieldInfo fieldInfo, int pqThreshold, int pqSubspaceCount) {
       /** For creating a new field from a flat field vectors writer. */
       this.fieldInfo = fieldInfo;
+      this.vectors = new ArrayList<>();
       this.docIds = new DocsWithFieldSet();
+
+      final var similarityFunction =
+          JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction());
+      this.buildScoreProvider =
+          new DelegatingBuildScoreProvider(
+              BuildScoreProvider.randomAccessScoreProvider(
+                  toRandomAccessVectorValues(),
+                  similarityFunction));
+
       this.pqThreshold = pqThreshold;
       this.pqSubspaceCount = pqSubspaceCount;
       this.pqVectors = null;
@@ -515,6 +517,11 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
         for (int i = 0; i < vectors.size(); ++i) {
           pqVectors.encodeAndSet(i, vectors.get(i));
         }
+
+        final var similarityFunction =
+            JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction());
+        buildScoreProvider.setDelegate(
+            BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors));
       }
     }
 
@@ -539,6 +546,43 @@ public long ramBytesUsed() {
     }
   }
 
+  static final class DelegatingBuildScoreProvider implements BuildScoreProvider {
+    BuildScoreProvider delegate;
+
+    DelegatingBuildScoreProvider(BuildScoreProvider delegate) {
+      this.delegate = Objects.requireNonNull(delegate);
+    }
+
+    public void setDelegate(BuildScoreProvider delegate) {
+      this.delegate = Objects.requireNonNull(delegate);
+    }
+
+		@Override
+		public boolean isExact() {
+		  return delegate.isExact();
+		}
+
+		@Override
+		public VectorFloat<?> approximateCentroid() {
+		  return delegate.approximateCentroid();
+		}
+
+		@Override
+		public SearchScoreProvider searchProviderFor(VectorFloat<?> vector) {
+		  return delegate.searchProviderFor(vector);
+		}
+
+		@Override
+		public SearchScoreProvider searchProviderFor(int node1) {
+		  return delegate.searchProviderFor(node1);
+		}
+
+		@Override
+		public SearchScoreProvider diversityProviderFor(int node1) {
+		  return delegate.diversityProviderFor(node1);
+		}
+  }
+
   private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) throws IOException {
     assert fieldInfo.hasVectorValues();
     final int dimension = fieldInfo.getVectorDimension();

From 4b25b4bf09e1864481c972c89851a85bbe828b93 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Fri, 5 Dec 2025 16:56:32 +0000
Subject: [PATCH 79/86] Use ImmutableGraphIndex for writeField

---
 .../apache/lucene/sandbox/codecs/jvector/JVectorWriter.java  | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 7f659dc576bf..ecb479118f3e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -20,6 +20,7 @@
 import static org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsReader.readVectorEncoding;
 
 import io.github.jbellis.jvector.graph.GraphIndexBuilder;
+import io.github.jbellis.jvector.graph.ImmutableGraphIndex;
 import io.github.jbellis.jvector.graph.ListRandomAccessVectorValues;
 import io.github.jbellis.jvector.graph.OnHeapGraphIndex;
 import io.github.jbellis.jvector.graph.RandomAccessVectorValues;
@@ -260,7 +261,7 @@ private void writeField(
       PQVectors pqVectors,
       OrdinalMapper ordinalMapper,
       GraphNodeIdToDocMap graphNodeIdToDocMap,
-      OnHeapGraphIndex graph)
+      ImmutableGraphIndex graph)
       throws IOException {
     final var vectorIndexFieldMetadata =
         writeGraph(
@@ -284,7 +285,7 @@ private void writeField(
    * @throws IOException IOException
    */
   private VectorIndexFieldMetadata writeGraph(
-      OnHeapGraphIndex graph,
+      ImmutableGraphIndex graph,
       RandomAccessVectorValues randomAccessVectorValues,
       FieldInfo fieldInfo,
       PQVectors pqVectors,

From 54959f50d4c3430bbca5c44bf8c78b5ef38055dd Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Fri, 5 Dec 2025 16:59:00 +0000
Subject: [PATCH 80/86] Build graph while adding docs

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 48 +++++++++++++++----
 1 file changed, 40 insertions(+), 8 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index ecb479118f3e..ec6543555dfb 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -181,7 +181,16 @@ public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException
       throw new UnsupportedOperationException(errorMessage);
     }
     final int M = numberOfSubspacesPerVectorSupplier.applyAsInt(fieldInfo.getVectorDimension());
-    final FieldWriter newField = new FieldWriter(fieldInfo, minimumBatchSizeForQuantization, M);
+    final FieldWriter newField =
+        new FieldWriter(
+          fieldInfo,
+          maxConn,
+          beamWidth,
+          degreeOverflow,
+          alpha,
+          hierarchyEnabled,
+          minimumBatchSizeForQuantization,
+          M);
 
     fields.add(newField);
     return newField;
@@ -220,13 +229,9 @@ public void flush(int maxDoc, Sorter.DocMap sortMap) throws IOException {
         ordinalMapper = null;
       }
       final RandomAccessVectorValues randomAccessVectorValues = field.toRandomAccessVectorValues();
-      final BuildScoreProvider buildScoreProvider = field.buildScoreProvider;
       final PQVectors pqVectors = field.getCompressedVectors();
-      final FieldInfo fieldInfo = field.fieldInfo;
-
+      final ImmutableGraphIndex graph = field.getGraphIndex();
       final GraphNodeIdToDocMap graphNodeIdToDocMap = new GraphNodeIdToDocMap(newDocIds);
-      final var graph =
-          getGraph(buildScoreProvider, randomAccessVectorValues, fieldInfo, Runnable::run);
       writeField(
           field.fieldInfo,
           randomAccessVectorValues,
@@ -461,6 +466,7 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final List<VectorFloat<?>> vectors;
     private final DocsWithFieldSet docIds;
 
+    private GraphIndexBuilder indexBuilder;
     private final DelegatingBuildScoreProvider buildScoreProvider;
 
     // PQ fields
@@ -468,7 +474,15 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
     private final int pqSubspaceCount;
     private MutablePQVectors pqVectors;
 
-    FieldWriter(FieldInfo fieldInfo, int pqThreshold, int pqSubspaceCount) {
+    FieldWriter(
+        FieldInfo fieldInfo,
+        int maxConn,
+        int beamWidth,
+        float degreeOverflow,
+        float alpha,
+        boolean hierarchyEnabled,
+        int pqThreshold,
+        int pqSubspaceCount) {
       /** For creating a new field from a flat field vectors writer. */
       this.fieldInfo = fieldInfo;
       this.vectors = new ArrayList<>();
@@ -481,6 +495,15 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
               BuildScoreProvider.randomAccessScoreProvider(
                   toRandomAccessVectorValues(),
                   similarityFunction));
+      this.indexBuilder =
+          new GraphIndexBuilder(
+              buildScoreProvider,
+              fieldInfo.getVectorDimension(),
+              maxConn,
+              beamWidth,
+              degreeOverflow,
+              alpha,
+              hierarchyEnabled);
 
       this.pqThreshold = pqThreshold;
       this.pqSubspaceCount = pqSubspaceCount;
@@ -495,12 +518,13 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
                 + fieldInfo.name
                 + "\" appears more than once in this document (only one value is allowed per field)");
       }
+      final int ord = vectors.size();
       docIds.add(docID);
       final var vector = VECTOR_TYPE_SUPPORT.createFloatVector(copyValue(vectorValue));
       vectors.add(vector);
 
       if (pqVectors != null) {
-        pqVectors.encodeAndSet(vectors.size() - 1, vector);
+        pqVectors.encodeAndSet(ord, vector);
       } else if (vectors.size() > pqThreshold) {
         final boolean globallyCenter =
             switch (fieldInfo.getVectorSimilarityFunction()) {
@@ -523,7 +547,10 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
             JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction());
         buildScoreProvider.setDelegate(
             BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors));
+        indexBuilder = GraphIndexBuilder.rescore(indexBuilder, buildScoreProvider);
       }
+
+      indexBuilder.addGraphNode(ord, buildScoreProvider.searchProviderFor(vector));
     }
 
     @Override
@@ -539,6 +566,11 @@ public PQVectors getCompressedVectors() {
       return pqVectors;
     }
 
+    public ImmutableGraphIndex getGraphIndex() {
+      indexBuilder.cleanup();
+      return indexBuilder.getGraph();
+    }
+
     @Override
     public long ramBytesUsed() {
       return SHALLOW_SIZE

From ab9e92a5d7ef0aade7af0eedf3aa41f7d7eb247b Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Fri, 5 Dec 2025 19:50:11 +0000
Subject: [PATCH 81/86] Support maxDegrees per-layer

---
 .../sandbox/codecs/jvector/JVectorFormat.java | 27 ++++++++++++++++---
 .../sandbox/codecs/jvector/JVectorWriter.java | 20 +++++++-------
 2 files changed, 35 insertions(+), 12 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
index 25b0b3da6d5c..07a4f31f6bad 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorFormat.java
@@ -18,6 +18,7 @@
 package org.apache.lucene.sandbox.codecs.jvector;
 
 import java.io.IOException;
+import java.util.List;
 import java.util.function.IntUnaryOperator;
 import org.apache.lucene.codecs.KnnVectorsFormat;
 import org.apache.lucene.codecs.KnnVectorsReader;
@@ -43,7 +44,7 @@ public class JVectorFormat extends KnnVectorsFormat {
   public static final float DEFAULT_ALPHA = 2f;
   public static final boolean DEFAULT_HIERARCHY_ENABLED = true;
 
-  private final int maxConn;
+  private final List<Integer> maxDegrees;
   private final int beamWidth;
   // As a function of the original dimension
   private final IntUnaryOperator numberOfSubspacesPerVectorSupplier;
@@ -104,8 +105,28 @@ public JVectorFormat(
       IntUnaryOperator numberOfSubspacesPerVectorSupplier,
       int minBatchSizeForQuantization,
       boolean hierarchyEnabled) {
+    this(
+      name,
+      hierarchyEnabled ? List.of(maxConn * 2, maxConn) : List.of(maxConn),
+      beamWidth,
+      neighborOverflow,
+      alpha,
+      numberOfSubspacesPerVectorSupplier,
+      minBatchSizeForQuantization,
+      hierarchyEnabled);
+  }
+
+  public JVectorFormat(
+      String name,
+      List<Integer> maxDegrees,
+      int beamWidth,
+      float neighborOverflow,
+      float alpha,
+      IntUnaryOperator numberOfSubspacesPerVectorSupplier,
+      int minBatchSizeForQuantization,
+      boolean hierarchyEnabled) {
     super(name);
-    this.maxConn = maxConn;
+    this.maxDegrees = maxDegrees;
     this.beamWidth = beamWidth;
     this.numberOfSubspacesPerVectorSupplier = numberOfSubspacesPerVectorSupplier;
     this.minBatchSizeForQuantization = minBatchSizeForQuantization;
@@ -118,7 +139,7 @@ public JVectorFormat(
   public KnnVectorsWriter fieldsWriter(SegmentWriteState state) throws IOException {
     return new JVectorWriter(
         state,
-        maxConn,
+        maxDegrees,
         beamWidth,
         neighborOverflow,
         alpha,
diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index ec6543555dfb..a66ca69da824 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -108,7 +108,7 @@ public class JVectorWriter extends KnnVectorsWriter {
 
   private final IndexOutput meta;
   private final IndexOutput data;
-  private final int maxConn;
+  private final List<Integer> maxDegrees;
   private final int beamWidth;
   private final float degreeOverflow;
   private final float alpha;
@@ -123,7 +123,7 @@ public class JVectorWriter extends KnnVectorsWriter {
 
   public JVectorWriter(
       SegmentWriteState segmentWriteState,
-      int maxConn,
+      List<Integer> maxDegrees,
       int beamWidth,
       float degreeOverflow,
       float alpha,
@@ -131,7 +131,7 @@ public JVectorWriter(
       int minimumBatchSizeForQuantization,
       boolean hierarchyEnabled)
       throws IOException {
-    this.maxConn = maxConn;
+    this.maxDegrees = maxDegrees;
     this.beamWidth = beamWidth;
     this.degreeOverflow = degreeOverflow;
     this.alpha = alpha;
@@ -184,7 +184,7 @@ public KnnFieldVectorsWriter<?> addField(FieldInfo fieldInfo) throws IOException
     final FieldWriter newField =
         new FieldWriter(
           fieldInfo,
-          maxConn,
+          maxDegrees,
           beamWidth,
           degreeOverflow,
           alpha,
@@ -476,7 +476,7 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
 
     FieldWriter(
         FieldInfo fieldInfo,
-        int maxConn,
+        List<Integer> maxDegrees,
         int beamWidth,
         float degreeOverflow,
         float alpha,
@@ -499,11 +499,12 @@ static class FieldWriter extends KnnFieldVectorsWriter<float[]> {
           new GraphIndexBuilder(
               buildScoreProvider,
               fieldInfo.getVectorDimension(),
-              maxConn,
+              maxDegrees,
               beamWidth,
               degreeOverflow,
               alpha,
-              hierarchyEnabled);
+              hierarchyEnabled,
+              true);
 
       this.pqThreshold = pqThreshold;
       this.pqSubspaceCount = pqSubspaceCount;
@@ -858,11 +859,12 @@ public OnHeapGraphIndex getGraph(
         new GraphIndexBuilder(
             buildScoreProvider,
             fieldInfo.getVectorDimension(),
-            maxConn,
+            maxDegrees,
             beamWidth,
             degreeOverflow,
             alpha,
-            hierarchyEnabled);
+            hierarchyEnabled,
+            true);
 
     /*
      * We cannot always use randomAccessVectorValues for the graph building

From 528b6d9d60d43c2ecda6101b924ab822eda0dcfd Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 8 Dec 2025 20:38:52 +0000
Subject: [PATCH 82/86] Start largestQuantizedReaderIndex at -1

---
 .../lucene/sandbox/codecs/jvector/JVectorWriter.java   | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index a66ca69da824..eee7ef0d1c6e 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -678,11 +678,15 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             i -> ordToReaderOrd[i]);
 
     // Find the largest quantized reader to re-use its PQ codebook, if possible
-    int largestQuantizedReaderIndex = 0;
+    int largestQuantizedReaderIndex = -1;
     ProductQuantization pq = null;
     for (int i = 0; i < liveDocCounts.length; ++i) {
-      if (liveDocCounts[i] > liveDocCounts[largestQuantizedReaderIndex]) {
-        if (mergeState.knnVectorsReaders[i] instanceof JVectorReader jVectorReader) {
+      if (liveDocCounts[i] == 0) {
+        continue;
+      }
+      final var knnReader = mergeState.knnVectorsReaders[i].unwrapReaderForField(fieldInfo.name);
+      if (knnReader instanceof JVectorReader jVectorReader) {
+        if (pq == null || liveDocCounts[i] > liveDocCounts[largestQuantizedReaderIndex]) {
           final var maybeNewPq = jVectorReader.getProductQuantizationForField(fieldInfo.name);
           if (maybeNewPq.isPresent()) {
             largestQuantizedReaderIndex = i;

From fe3c7832cd6bb731c1b2b4859f48b4107255be14 Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 8 Dec 2025 20:40:13 +0000
Subject: [PATCH 83/86] fixup! Move PQ encoding to FieldWriter.addValue instead
 of flush

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 59 ++++++++-----------
 1 file changed, 26 insertions(+), 33 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index eee7ef0d1c6e..8c0c264030fc 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -342,26 +342,6 @@ private VectorIndexFieldMetadata writeGraph(
     }
   }
 
-  private PQVectors getPQVectors(
-      RandomAccessVectorValues randomAccessVectorValues, FieldInfo fieldInfo) throws IOException {
-    final boolean globallyCenter =
-        switch (fieldInfo.getVectorSimilarityFunction()) {
-          case EUCLIDEAN -> true;
-          case COSINE, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> false;
-        };
-    final int M =
-        numberOfSubspacesPerVectorSupplier.applyAsInt(randomAccessVectorValues.dimension());
-    final var numberOfClustersPerSubspace =
-        Math.min(256, randomAccessVectorValues.size()); // number of centroids per
-    // subspace
-
-    ProductQuantization pq =
-        ProductQuantization.compute(
-            randomAccessVectorValues, M, numberOfClustersPerSubspace, globallyCenter);
-
-    return (PQVectors) pq.encodeAll(randomAccessVectorValues);
-  }
-
   /// Metadata about the index to be persisted on disk
   public static class VectorIndexFieldMetadata {
     final int fieldNumber;
@@ -527,18 +507,11 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
       if (pqVectors != null) {
         pqVectors.encodeAndSet(ord, vector);
       } else if (vectors.size() > pqThreshold) {
-        final boolean globallyCenter =
-            switch (fieldInfo.getVectorSimilarityFunction()) {
-              case EUCLIDEAN -> true;
-              case COSINE, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> false;
-            };
-        final int pqCenterCount = Math.min(256, vectors.size());
-        final var pq =
-            ProductQuantization.compute(
+        final ProductQuantization pq =
+            trainPQ(
                 toRandomAccessVectorValues(),
                 pqSubspaceCount,
-                pqCenterCount,
-                globallyCenter);
+                fieldInfo.getVectorSimilarityFunction());
         pqVectors = new MutablePQVectors(pq);
         for (int i = 0; i < vectors.size(); ++i) {
           pqVectors.encodeAndSet(i, vectors.get(i));
@@ -551,7 +524,7 @@ public void addValue(int docID, float[] vectorValue) throws IOException {
         indexBuilder = GraphIndexBuilder.rescore(indexBuilder, buildScoreProvider);
       }
 
-      indexBuilder.addGraphNode(ord, buildScoreProvider.searchProviderFor(vector));
+      indexBuilder.addGraphNode(ord, vector);
     }
 
     @Override
@@ -713,8 +686,9 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
       }
       pqVectors = (PQVectors) newPq.encodeAll(ravv);
     } else if (ravv.size() >= minimumBatchSizeForQuantization) {
-      // No pre-existing codebooks, check if we have enough vectors to trigger quantization
-      pqVectors = getPQVectors(ravv, fieldInfo);
+      final int M = numberOfSubspacesPerVectorSupplier.applyAsInt(ravv.dimension());
+      final ProductQuantization newPQ = trainPQ(ravv, M, fieldInfo.getVectorSimilarityFunction());
+      pqVectors = newPQ.encodeAll(ravv, SIMD_POOL);
     } else {
       pqVectors = null;
     }
@@ -894,6 +868,25 @@ public OnHeapGraphIndex getGraph(
     return graphIndex;
   }
 
+  private static ProductQuantization trainPQ(
+      RandomAccessVectorValues vectors,
+      int M,
+      org.apache.lucene.index.VectorSimilarityFunction similarityFunction) {
+    final boolean globallyCenter =
+        switch (similarityFunction) {
+          case EUCLIDEAN -> true;
+          case COSINE, DOT_PRODUCT, MAXIMUM_INNER_PRODUCT -> false;
+        };
+    final int numberOfClustersPerSubspace = Math.min(256, vectors.size());
+    // This extracts a random minimal subset of the vectors for training the PQ codebooks
+    return
+        ProductQuantization.compute(
+          vectors,
+          M,
+          numberOfClustersPerSubspace,
+          globallyCenter);
+  }
+
   static class RandomAccessVectorValuesOverVectorValues implements RandomAccessVectorValues {
     private final FloatVectorValues values;
 

From 029a116fda0bcd11100f6586dec7539d67e567bf Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 8 Dec 2025 21:45:10 +0000
Subject: [PATCH 84/86] Don't re-use PQ codebooks

---
 .../sandbox/codecs/jvector/JVectorWriter.java | 38 +------------------
 1 file changed, 1 insertion(+), 37 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 8c0c264030fc..5cc8452abeb5 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -618,7 +618,6 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
 
     // These arrays may be larger than strictly necessary if there are deleted docs/missing fields
     final int totalMaxDocs = Arrays.stream(mergeState.maxDocs).reduce(0, Math::addExact);
-    final int[] liveDocCounts = new int[mergeCount];
     final DocsWithFieldSet docIds = new DocsWithFieldSet();
     final int[] ordToReaderIndex = new int[totalMaxDocs];
     final int[] ordToReaderOrd = new int[totalMaxDocs];
@@ -627,8 +626,6 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
     int ord = 0;
     final var docIdMerger = DocIDMerger.of(subs, mergeState.needsIndexSort);
     for (var sub = docIdMerger.next(); sub != null; sub = docIdMerger.next()) {
-      final int readerIndex = sub.readerIndex;
-      liveDocCounts[readerIndex] += 1;
       docIds.add(sub.mappedDocID);
       ordToReaderIndex[ord] = sub.readerIndex;
       ordToReaderOrd[ord] = sub.index();
@@ -650,42 +647,9 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             i -> ordToReaderIndex[i],
             i -> ordToReaderOrd[i]);
 
-    // Find the largest quantized reader to re-use its PQ codebook, if possible
-    int largestQuantizedReaderIndex = -1;
-    ProductQuantization pq = null;
-    for (int i = 0; i < liveDocCounts.length; ++i) {
-      if (liveDocCounts[i] == 0) {
-        continue;
-      }
-      final var knnReader = mergeState.knnVectorsReaders[i].unwrapReaderForField(fieldInfo.name);
-      if (knnReader instanceof JVectorReader jVectorReader) {
-        if (pq == null || liveDocCounts[i] > liveDocCounts[largestQuantizedReaderIndex]) {
-          final var maybeNewPq = jVectorReader.getProductQuantizationForField(fieldInfo.name);
-          if (maybeNewPq.isPresent()) {
-            largestQuantizedReaderIndex = i;
-            pq = maybeNewPq.get();
-          }
-        }
-      }
-    }
-
     // Perform PQ if applicable
     final PQVectors pqVectors;
-    if (pq != null) {
-      // Refine the leadingCompressor with the remaining vectors in the merge
-      ProductQuantization newPq = pq;
-      for (int i = 0; i < mergeCount; i++) {
-        if (i == largestQuantizedReaderIndex || vectors[i] == null) {
-          // Skip the reader associated with the re-used PQ codebook
-          continue;
-        }
-        final FloatVectorValues values = vectors[i];
-        final RandomAccessVectorValues randomAccessVectorValues =
-            new RandomAccessVectorValuesOverVectorValues(values);
-        newPq = newPq.refine(randomAccessVectorValues);
-      }
-      pqVectors = (PQVectors) newPq.encodeAll(ravv);
-    } else if (ravv.size() >= minimumBatchSizeForQuantization) {
+    if (ravv.size() >= minimumBatchSizeForQuantization) {
       final int M = numberOfSubspacesPerVectorSupplier.applyAsInt(ravv.dimension());
       final ProductQuantization newPQ = trainPQ(ravv, M, fieldInfo.getVectorSimilarityFunction());
       pqVectors = newPQ.encodeAll(ravv, SIMD_POOL);

From 5976164a8bea9f9c3e836af17b5cb2495e35dbab Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 8 Dec 2025 21:49:53 +0000
Subject: [PATCH 85/86] fixup! fixup! Move PQ encoding to FieldWriter.addValue
 instead of flush

---
 .../org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 5cc8452abeb5..9b0e14962ac3 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -652,7 +652,7 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
     if (ravv.size() >= minimumBatchSizeForQuantization) {
       final int M = numberOfSubspacesPerVectorSupplier.applyAsInt(ravv.dimension());
       final ProductQuantization newPQ = trainPQ(ravv, M, fieldInfo.getVectorSimilarityFunction());
-      pqVectors = newPQ.encodeAll(ravv, SIMD_POOL);
+      pqVectors = (PQVectors) newPQ.encodeAll(ravv);
     } else {
       pqVectors = null;
     }

From d2738f6a86a0bf93587bdd62b048a80f59145a3a Mon Sep 17 00:00:00 2001
From: Alec Bernardi <alecber@amazon.com>
Date: Mon, 8 Dec 2025 23:11:57 +0000
Subject: [PATCH 86/86] Small re-organize PQ merge

---
 .../sandbox/codecs/jvector/JVectorWriter.java     | 15 ++++++---------
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
index 9b0e14962ac3..fb9b43f347a1 100644
--- a/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
+++ b/lucene/sandbox/src/java/org/apache/lucene/sandbox/codecs/jvector/JVectorWriter.java
@@ -647,28 +647,25 @@ private void mergeAndWriteField(FieldInfo fieldInfo, MergeState mergeState) thro
             i -> ordToReaderIndex[i],
             i -> ordToReaderOrd[i]);
 
+    final BuildScoreProvider buildScoreProvider;
+    final var similarityFunction =
+        JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction());
+
     // Perform PQ if applicable
     final PQVectors pqVectors;
     if (ravv.size() >= minimumBatchSizeForQuantization) {
       final int M = numberOfSubspacesPerVectorSupplier.applyAsInt(ravv.dimension());
       final ProductQuantization newPQ = trainPQ(ravv, M, fieldInfo.getVectorSimilarityFunction());
       pqVectors = (PQVectors) newPQ.encodeAll(ravv);
-    } else {
-      pqVectors = null;
-    }
-
-    final BuildScoreProvider buildScoreProvider;
-    final var similarityFunction =
-        JVectorFormat.toJVectorSimilarity(fieldInfo.getVectorSimilarityFunction());
-    if (pqVectors != null) {
-      // Re-use PQ codebooks to build a new graph from scratch
       buildScoreProvider = BuildScoreProvider.pqBuildScoreProvider(similarityFunction, pqVectors);
       // Pre-init the diversity provider here to avoid doing it lazily (as it could block the SIMD
       // threads)
       buildScoreProvider.diversityProviderFor(0);
     } else {
+      pqVectors = null;
       buildScoreProvider = BuildScoreProvider.randomAccessScoreProvider(ravv, similarityFunction);
     }
+
     final var graphNodeIdToDocMap = new GraphNodeIdToDocMap(docIds);
     final var graph =
         getGraph(buildScoreProvider, ravv, fieldInfo, mergeState.intraMergeTaskExecutor);