diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
index a84aece..ebd14eb 100644
--- a/.github/workflows/ci.yml
+++ b/.github/workflows/ci.yml
@@ -23,6 +23,8 @@ jobs:
         run: cargo fmt --all --check
       - name: Rust Lint - Clippy
         run: cargo clippy --all-features --all-targets
+      - name: Rust Lint - Docs
+        run: cargo doc --no-deps --all-features --document-private-items
       - name: Rust Test
         run: cargo test --workspace --all-features
 
diff --git a/Cargo.toml b/Cargo.toml
index 487d1bb..45143c3 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -19,6 +19,10 @@ name = "onpair"
 warnings = "deny"
 missing_docs = "deny"
 
+[lints.rustdoc]
+broken_intra_doc_links = "deny"
+private_intra_doc_links = "deny"
+
 [lints.clippy]
 all = { level = "deny", priority = -1 }
 if_then_some_else_none = { level = "deny" }
diff --git a/README.md b/README.md
index f24e1ad..a91e762 100644
--- a/README.md
+++ b/README.md
@@ -3,10 +3,14 @@
 OnPair is a dictionary-based string compression algorithm designed for on-disk and in-memory database workloads that need both strong compression ratios and fast random access to individual values. 
 It builds its dictionary in a single sequential pass by incrementally merging frequent adjacent substrings, achieving compression comparable to BPE while being substantially faster and more memory-efficient. 
 
-## Format
+## Interchange format
 
-The binary layout of a compressed column — dictionary bytes, dictionary
-offsets, and codes — is specified in [docs/binary-format.md](docs/binary-format.md).
+OnPair defines a shared in-memory representation — the *plain interchange form*
+that independent implementations exchange so a column produced by one is
+readable by another. It fixes the buffers (dictionary bytes, dictionary
+offsets, codes, and row offsets) and their invariants; denser internal
+encodings and on-disk serialization are out of scope. See
+[docs/interchange-format.md](docs/interchange-format.md).
 
 ## References
 
diff --git a/benches/tpch.rs b/benches/tpch.rs
index 22a5660..1b33dbb 100644
--- a/benches/tpch.rs
+++ b/benches/tpch.rs
@@ -19,7 +19,7 @@
 //
 // Run with: cargo bench --bench tpch
 //
-// Targets the slim public API in PUBLIC_API.md
+// Targets the slim public API
 // (`compress` / `decompress` free fns + `Column::as_parts()`).
 
 use std::collections::HashMap;
diff --git a/benchmarks/onpair-bench/README.md b/benchmarks/onpair-bench/README.md
index 2425fc9..64ce7c5 100644
--- a/benchmarks/onpair-bench/README.md
+++ b/benchmarks/onpair-bench/README.md
@@ -24,7 +24,7 @@ The bench is a uv workspace member of the repo-root `pyproject.toml`. Sync
 once from the repo root, then drive it with `uv run`:
 
 ```bash
-# from /Users/joeisaacs/git/spiraldb/onpair (one-time):
+# from the repo root (one-time):
 uv sync
 
 # drop a corpus in:
diff --git a/docs/binary-format.md b/docs/interchange-format.md
similarity index 99%
rename from docs/binary-format.md
rename to docs/interchange-format.md
index 7fbfd2d..d649e4f 100644
--- a/docs/binary-format.md
+++ b/docs/interchange-format.md
@@ -347,7 +347,8 @@ A column is conformant if and only if all of the following hold.
 - Every token length `o_{i+1} - o_i` is in `1 ..= MAX_TOKEN_SIZE`.
 - All 256 single-byte tokens are present (completeness, §3).
 - No two tokens are equal (uniqueness, §3).
-- `dict_bytes_len >= o_N + MAX_TOKEN_SIZE` (the read-padding bound, §3.1).
+- `dict_bytes_len >= o_{N-1} + MAX_TOKEN_SIZE` (the read-padding bound, §3.1;
+  `o_{N-1}` is the offset of the last token).
 - `is_sorted` is `0` or `1`; if `1`, tokens are strictly increasing in
   bytewise-lexicographic order.
 
diff --git a/src/column.rs b/src/column.rs
index 72a7274..1030c7c 100644
--- a/src/column.rs
+++ b/src/column.rs
@@ -28,13 +28,14 @@ pub struct Column<O: Offset> {
     pub codes: Vec<u16>,
     /// `R + 1` offsets into `codes` delimiting the `R` input rows: row `r`'s
     /// codes are `codes[code_offsets[r]..code_offsets[r + 1]]`. The compressor
-    /// emits these because a token may span a row boundary, so the row
-    /// structure cannot be recovered from the codes alone.
+    /// emits these because the codes are a flat concatenation with no in-band
+    /// row delimiter, so the row structure cannot be recovered from the codes
+    /// alone.
     pub code_offsets: Vec<O>,
 }
 
 /// Borrowed view of the data the decoder needs, consumed by
-/// [`crate::decompress`] and [`crate::decompress_into`].
+/// [`fn@crate::decompress`] and [`crate::decompress_into`].
 /// Downstream consumers deserializing from storage build this via struct
 /// literal — there is no constructor.
 #[derive(Copy, Clone, Debug)]
@@ -56,7 +57,7 @@ pub struct Parts<'a> {
 
 impl<O: Offset> Column<O> {
     /// Zero-copy view over this column's decode arrays. Pass directly to
-    /// [`crate::decompress`] or [`crate::decompress_into`]. `code_offsets` is
+    /// [`fn@crate::decompress`] or [`crate::decompress_into`]. `code_offsets` is
     /// compressor metadata and is not part of the view.
     #[inline]
     pub fn as_parts(&self) -> Parts<'_> {
diff --git a/src/decompress/fat.rs b/src/decompress/fat.rs
index 1a7dd50..eb8ab76 100644
--- a/src/decompress/fat.rs
+++ b/src/decompress/fat.rs
@@ -4,11 +4,9 @@
 //! "Fat" token table layout.
 //!
 //! Each token is materialized into a 16-byte-strided row, so a decode load
-//! addresses `data + code * 16` straight from the code — replacing the
-//! `code → entry → dict[offset]` dependent-load chain of the
-//! [`super::DecodeEntry`] layout with a single independent load. Costs
-//! `dict_tokens * 16` bytes of table; whether that pays is a cache-residency
-//! question the [`super::plan`] index decides per host.
+//! addresses `data + code * 16` straight from the code — a single independent
+//! load, with no `code → entry → dict[offset]` indirection. Costs
+//! `dict_tokens * 16` bytes of table, rebuilt once per decode call.
 //!
 //! Loop structure: a 16-byte over-copy fast region ([`super::scalar::copy16`])
 //! plus an exact, length-aware tail.
diff --git a/src/decompress/mod.rs b/src/decompress/mod.rs
index 272de0d..a052e69 100644
--- a/src/decompress/mod.rs
+++ b/src/decompress/mod.rs
@@ -583,8 +583,8 @@ mod tests {
     }
 
     /// Exercise the full decode width sweep against a corpus large enough to
-    /// drive the batched AVX-512 prefix, the scalar 16-byte remainder, and the
-    /// exact tail in a single call.
+    /// drive the 16-byte over-copy fast region and the exact, length-aware
+    /// tail in a single call.
     #[test]
     fn decompress_matches_input_across_widths() {
         let mut bytes = Vec::new();
diff --git a/src/lpm.rs b/src/lpm.rs
index 57a98bc..ecb4f01 100644
--- a/src/lpm.rs
+++ b/src/lpm.rs
@@ -266,7 +266,7 @@ impl LongestPrefixMatcher {
     /// prefix's length.
     ///
     /// Precondition: `!data.is_empty()` and the matcher contains every
-    /// single-byte token (always true after [`new`] or [`from_dictionary`]
+    /// single-byte token (always true after [`Self::new`] or [`Self::from_dictionary`]
     /// with a complete dictionary).
     #[inline]
     pub fn find_longest_match(&self, data: &[u8]) -> (Token, usize) {
diff --git a/src/parser.rs b/src/parser.rs
index ddb40d8..190f555 100644
--- a/src/parser.rs
+++ b/src/parser.rs
@@ -31,7 +31,7 @@ impl Parser {
     /// Train a dictionary against `bytes` / `offsets` and build the matching
     /// LPM. `offsets` has length `n + 1`. Returns [`Error::InvalidArg`] if
     /// `offsets` is empty or its last (maximum) offset cannot be represented in
-    /// `usize` or exceeds `bytes.len()` — see [`validate_offsets`]. The `cfg`
+    /// `usize` or exceeds `bytes.len()`. The `cfg`
     /// is valid by construction ([`Bits`](crate::Bits) /
     /// [`Threshold`](crate::Threshold)).
     pub fn train<O: Offset>(bytes: &[u8], offsets: &[O], cfg: Config) -> Result<Self, Error> {
@@ -52,8 +52,8 @@ impl Parser {
     /// Encode `bytes` / `offsets` using this parser. The dictionary is cloned
     /// into the returned [`Column`] so the column is fully decode-self-
     /// contained — the strings need not be the corpus the parser was trained
-    /// on. Returns [`Error::InvalidArg`] on invalid offsets — see
-    /// [`validate_offsets`].
+    /// on. Returns [`Error::InvalidArg`] if `offsets` is empty or its last
+    /// offset cannot be represented in `usize` or exceeds `bytes.len()`.
     pub fn parse<O: Offset>(&self, bytes: &[u8], offsets: &[O]) -> Result<Column<O>, Error> {
         validate_offsets(bytes, offsets)?;
         Ok(self.parse_unchecked(bytes, offsets))
@@ -81,9 +81,9 @@ impl Parser {
 
 /// Encode every string into a flat `Vec<u16>` of codes plus per-row
 /// `code_offsets`. Offset `[i]..[i + 1]` indexes the codes for row `i`. The
-/// offsets are compressor metadata — a token may span a row boundary, so the
-/// row structure cannot be recovered from the codes alone — and are not needed
-/// to decode the column as one flat stream.
+/// offsets are compressor metadata — the codes are a flat concatenation with no
+/// in-band row delimiter, so the row structure cannot be recovered from the
+/// codes alone — and are not needed to decode the column as one flat stream.
 pub(crate) fn encode_strings<O: Offset>(
     bytes: &[u8],
     offsets: &[O],