From 5540799a23b5e08582fd09f6933f3fe25acf882e Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Sat, 11 Apr 2026 17:10:51 +0100
Subject: [PATCH 1/9] Allow dictionaries from a wider range of types for
 indices

---
 r/src/array_to_vector.cpp     | 23 ++++++++++++++++++++++-
 r/tests/testthat/test-Table.R | 16 ++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index 432b49503e1a..7af710bc7f32 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -595,7 +595,9 @@ class Converter_Dictionary : public Converter {
         case Type::UINT16:
         case Type::INT16:
         case Type::INT32:
-          // TODO: also add int64, uint32, uint64 downcasts, if possible
+        case Type::UINT32:
+        case Type::INT64:
+        case Type::UINT64:
           break;
         default:
           cpp11::stop("Cannot convert Dictionary Array of type `%s` to R",
@@ -612,6 +614,16 @@ class Converter_Dictionary : public Converter {
         dictionary_ = CreateEmptyArray(dict_type.value_type());
       }
     }
+
+    // R factors store their codes in 32-bit integers, so dictionary arrays with
+    // more levels than that cannot be represented safely.
+    if (dictionary_->length() > std::numeric_limits<int>::max()) {
+      const auto& dict_type = checked_cast<const DictionaryType&>(*chunked_array->type());
+      cpp11::stop(
+          "Cannot convert Dictionary Array of type `%s` to R: dictionary has "
+          "more levels than an R factor can represent",
+          dict_type.ToString().c_str());
+    }
   }
 
   SEXP Allocate(R_xlen_t n) const {
@@ -653,6 +665,15 @@ class Converter_Dictionary : public Converter {
       case Type::INT32:
         return Ingest_some_nulls_Impl<arrow::Int32Type>(data, array, start, n,
                                                         chunk_index);
+      case Type::UINT32:
+        return Ingest_some_nulls_Impl<arrow::UInt32Type>(data, array, start, n,
+                                                         chunk_index);
+      case Type::INT64:
+        return Ingest_some_nulls_Impl<arrow::Int64Type>(data, array, start, n,
+                                                        chunk_index);
+      case Type::UINT64:
+        return Ingest_some_nulls_Impl<arrow::UInt64Type>(data, array, start, n,
+                                                         chunk_index);
       default:
         break;
     }
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index 1ca8832beb84..e404da1d029e 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -371,6 +371,22 @@ test_that("Can create table with specific dictionary types", {
   }
 })
 
+test_that("Table converts dictionary arrays with wider index types back to R", {
+  fact <- example_data[, "fct"]
+
+  tab_uint32 <- Table$create(fact, schema = schema(fct = dictionary(uint32(), utf8())))
+  expect_equal(tab_uint32$schema, schema(fct = dictionary(uint32(), utf8())))
+  expect_equal_data_frame(tab_uint32, fact)
+
+  tab_int64 <- Table$create(fact, schema = schema(fct = dictionary(int64(), utf8())))
+  expect_equal(tab_int64$schema, schema(fct = dictionary(int64(), utf8())))
+  expect_equal_data_frame(tab_int64, fact)
+
+  tab_uint64 <- Table$create(fact, schema = schema(fct = dictionary(uint64(), utf8())))
+  expect_equal(tab_uint64$schema, schema(fct = dictionary(uint64(), utf8())))
+  expect_equal_data_frame(tab_uint64, fact)
+})
+
 test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
   b1 <- record_batch(f = factor(c("a"), levels = c("a", "b")))
   b2 <- record_batch(f = factor(c("c"), levels = c("c", "d")))

From 747210dceff6772bbde5103c0fe9f2bd5ec26fe3 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Sat, 11 Apr 2026 19:12:30 +0100
Subject: [PATCH 2/9] Implement string view

---
 cpp/src/arrow/util/converter.h    |  1 +
 r/NAMESPACE                       |  1 +
 r/R/arrowExports.R                | 21 +++++++-----
 r/R/dplyr-funcs-doc.R             |  2 +-
 r/R/type.R                        | 13 ++++++++
 r/man/data-type.Rd                |  3 ++
 r/man/read_json_arrow.Rd          |  2 +-
 r/man/schema.Rd                   |  2 +-
 r/src/array_to_vector.cpp         | 44 +++++++++++++++----------
 r/src/arrowExports.cpp            | 45 ++++++++++++++-----------
 r/src/datatype.cpp                |  5 +++
 r/src/r_to_arrow.cpp              | 55 +++++++++++++++++++++++++++++--
 r/tests/testthat/test-Table.R     |  7 ++++
 r/tests/testthat/test-data-type.R | 12 +++++++
 14 files changed, 161 insertions(+), 52 deletions(-)

diff --git a/cpp/src/arrow/util/converter.h b/cpp/src/arrow/util/converter.h
index c23d6ccd9886..3d1d07d53c2b 100644
--- a/cpp/src/arrow/util/converter.h
+++ b/cpp/src/arrow/util/converter.h
@@ -239,6 +239,7 @@ struct MakeConverterImpl {
       DICTIONARY_CASE(DoubleType);
       DICTIONARY_CASE(BinaryType);
       DICTIONARY_CASE(StringType);
+      DICTIONARY_CASE(StringViewType);
       DICTIONARY_CASE(FixedSizeBinaryType);
 #undef DICTIONARY_CASE
       default:
diff --git a/r/NAMESPACE b/r/NAMESPACE
index f42944fb58b5..320c9b378e3f 100644
--- a/r/NAMESPACE
+++ b/r/NAMESPACE
@@ -397,6 +397,7 @@ export(set_io_thread_count)
 export(show_exec_plan)
 export(starts_with)
 export(string)
+export(string_view)
 export(struct)
 export(time32)
 export(time64)
diff --git a/r/R/arrowExports.R b/r/R/arrowExports.R
index 52274d29f0d9..735a86964373 100644
--- a/r/R/arrowExports.R
+++ b/r/R/arrowExports.R
@@ -900,6 +900,10 @@ Utf8__initialize <- function() {
   .Call(`_arrow_Utf8__initialize`)
 }
 
+StringView__initialize <- function() {
+  .Call(`_arrow_StringView__initialize`)
+}
+
 LargeUtf8__initialize <- function() {
   .Call(`_arrow_LargeUtf8__initialize`)
 }
@@ -1248,14 +1252,6 @@ Field__Equals <- function(field, other, check_metadata) {
   .Call(`_arrow_Field__Equals`, field, other, check_metadata)
 }
 
-Field__nullable <- function(field) {
-  .Call(`_arrow_Field__nullable`, field)
-}
-
-Field__type <- function(field) {
-  .Call(`_arrow_Field__type`, field)
-}
-
 Field__HasMetadata <- function(field) {
   .Call(`_arrow_Field__HasMetadata`, field)
 }
@@ -1272,6 +1268,14 @@ Field__RemoveMetadata <- function(field) {
   .Call(`_arrow_Field__RemoveMetadata`, field)
 }
 
+Field__nullable <- function(field) {
+  .Call(`_arrow_Field__nullable`, field)
+}
+
+Field__type <- function(field) {
+  .Call(`_arrow_Field__type`, field)
+}
+
 fs___FileInfo__type <- function(x) {
   .Call(`_arrow_fs___FileInfo__type`, x)
 }
@@ -2199,4 +2203,3 @@ SetIOThreadPoolCapacity <- function(threads) {
 Array__infer_type <- function(x) {
   .Call(`_arrow_Array__infer_type`, x)
 }
-
diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index f7ca29833c81..176181a09bbb 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -84,7 +84,7 @@
 #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
 #' `str_sub()` and `stringr::str_sub()` work.
 #'
-#' In addition to these functions, you can call any of Arrow's 281 compute
+#' In addition to these functions, you can call any of Arrow's 253 compute
 #' functions directly. Arrow has many functions that don't map to an existing R
 #' function. In other cases where there is an R function mapping, you can still
 #' call the Arrow function directly if you don't want the adaptations that the R
diff --git a/r/R/type.R b/r/R/type.R
index 27cb0afe3db6..b370db82d0cc 100644
--- a/r/R/type.R
+++ b/r/R/type.R
@@ -203,6 +203,13 @@ Utf8 <- R6Class(
     code = function(namespace = FALSE) call2("utf8", .ns = if (namespace) "arrow")
   )
 )
+StringView <- R6Class(
+  "StringView",
+  inherit = DataType,
+  public = list(
+    code = function(namespace = FALSE) call2("string_view", .ns = if (namespace) "arrow")
+  )
+)
 LargeUtf8 <- R6Class(
   "LargeUtf8",
   inherit = DataType,
@@ -505,6 +512,10 @@ bool <- boolean
 #' @export
 utf8 <- function() Utf8__initialize()
 
+#' @rdname data-type
+#' @export
+string_view <- function() StringView__initialize()
+
 #' @rdname data-type
 #' @export
 large_utf8 <- function() LargeUtf8__initialize()
@@ -806,6 +817,8 @@ canonical_type_str <- function(type_str) {
     boolean = "bool",
     bool = "bool",
     utf8 = "string",
+    utf8_view = "string_view",
+    string_view = "string_view",
     large_utf8 = "large_string",
     large_string = "large_string",
     binary = "binary",
diff --git a/r/man/data-type.Rd b/r/man/data-type.Rd
index aa11c222bc55..ce2a6e4e7583 100644
--- a/r/man/data-type.Rd
+++ b/r/man/data-type.Rd
@@ -18,6 +18,7 @@
 \alias{boolean}
 \alias{bool}
 \alias{utf8}
+\alias{string_view}
 \alias{large_utf8}
 \alias{binary}
 \alias{large_binary}
@@ -76,6 +77,8 @@ bool()
 
 utf8()
 
+string_view()
+
 large_utf8()
 
 binary()
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index b809a63bcc6f..abf6b8fc44a8 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -54,7 +54,7 @@ If \code{schema} is not provided, Arrow data types are inferred from the data:
 \item JSON numbers convert to \code{\link[=int64]{int64()}}, falling back to \code{\link[=float64]{float64()}} if a non-integer is encountered.
 \item JSON strings of the kind "YYYY-MM-DD" and "YYYY-MM-DD hh:mm:ss" convert to \code{\link[=timestamp]{timestamp(unit = "s")}},
 falling back to \code{\link[=utf8]{utf8()}} if a conversion error occurs.
-\item JSON arrays convert to a \code{\link[=list_of]{list_of()}} type, and inference proceeds recursively on the JSON arrays' values.
+\item JSON arrays convert to a \code{\link[vctrs:list_of]{vctrs::list_of()}} type, and inference proceeds recursively on the JSON arrays' values.
 \item Nested JSON objects convert to a \code{\link[=struct]{struct()}} type, and inference proceeds recursively on the JSON objects' values.
 }
 
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index 65ab2eea0d27..ff77a05d84aa 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -7,7 +7,7 @@
 schema(...)
 }
 \arguments{
-\item{...}{\link[=field]{fields}, field name/\link[=data-type]{data type} pairs (or a list of), or object from which to extract
+\item{...}{\link[vctrs:fields]{fields}, field name/\link[=data-type]{data type} pairs (or a list of), or object from which to extract
 a schema}
 }
 \description{
diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index 7af710bc7f32..bad234eb1120 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -290,26 +290,29 @@ struct Converter_String : public Converter {
 
   Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
                            R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
-    auto p_offset = array->data()->GetValues<int32_t>(1);
-    if (!p_offset) {
-      return Status::Invalid("Invalid offset buffer");
-    }
-    auto p_strings = array->data()->GetValues<char>(2, *p_offset);
-    if (!p_strings) {
-      // There is an offset buffer, but the data buffer is null
-      // There is at least one value in the array and not all the values are null
-      // That means all values are either empty strings or nulls so there is nothing to do
-
-      if (array->null_count()) {
-        arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
-                                                  array->offset(), n);
-        for (int i = 0; i < n; i++, null_reader.Next()) {
-          if (null_reader.IsNotSet()) {
-            SET_STRING_ELT(data, start + i, NA_STRING);
+    if constexpr (!std::is_same_v<StringArrayType, arrow::StringViewArray>) {
+      auto p_offset = array->data()->GetValues<int32_t>(1);
+      if (!p_offset) {
+        return Status::Invalid("Invalid offset buffer");
+      }
+      auto p_strings = array->data()->GetValues<char>(2, *p_offset);
+      if (!p_strings) {
+        // There is an offset buffer, but the data buffer is null
+        // There is at least one value in the array and not all the values are null
+        // That means all values are either empty strings or nulls so there is nothing to
+        // do
+
+        if (array->null_count()) {
+          arrow::internal::BitmapReader null_reader(array->null_bitmap_data(),
+                                                    array->offset(), n);
+          for (int i = 0; i < n; i++, null_reader.Next()) {
+            if (null_reader.IsNotSet()) {
+              SET_STRING_ELT(data, start + i, NA_STRING);
+            }
           }
         }
+        return Status::OK();
       }
-      return Status::OK();
     }
 
     StringArrayType* string_array = static_cast<StringArrayType*>(array.get());
@@ -725,7 +728,8 @@ class Converter_Dictionary : public Converter {
     // TODO (npr): this coercion should be optional, "dictionariesAsFactors" ;)
     // Alternative: preserve the logical type of the dictionary values
     // (e.g. if dict is timestamp, return a POSIXt R vector, not factor)
-    if (dictionary_->type_id() != Type::STRING) {
+    if (dictionary_->type_id() != Type::STRING &&
+        dictionary_->type_id() != Type::STRING_VIEW) {
       cpp11::safe[Rf_warning]("Coercing dictionary values to R character factor levels");
     }
 
@@ -1262,6 +1266,10 @@ std::shared_ptr<Converter> Converter::Make(
       return std::make_shared<arrow::r::Converter_String<arrow::LargeStringArray>>(
           chunked_array);
 
+    case Type::STRING_VIEW:
+      return std::make_shared<arrow::r::Converter_String<arrow::StringViewArray>>(
+          chunked_array);
+
     case Type::DICTIONARY:
       return std::make_shared<arrow::r::Converter_Dictionary>(chunked_array);
 
diff --git a/r/src/arrowExports.cpp b/r/src/arrowExports.cpp
index 5482c8679f68..4f8b54ceb339 100644
--- a/r/src/arrowExports.cpp
+++ b/r/src/arrowExports.cpp
@@ -2511,6 +2511,13 @@ BEGIN_CPP11
 END_CPP11
 }
 // datatype.cpp
+std::shared_ptr<arrow::DataType> StringView__initialize();
+extern "C" SEXP _arrow_StringView__initialize(){
+BEGIN_CPP11
+	return cpp11::as_sexp(StringView__initialize());
+END_CPP11
+}
+// datatype.cpp
 std::shared_ptr<arrow::DataType> LargeUtf8__initialize();
 extern "C" SEXP _arrow_LargeUtf8__initialize(){
 BEGIN_CPP11
@@ -3238,22 +3245,6 @@ BEGIN_CPP11
 END_CPP11
 }
 // field.cpp
-bool Field__nullable(const std::shared_ptr<arrow::Field>& field);
-extern "C" SEXP _arrow_Field__nullable(SEXP field_sexp){
-BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
-	return cpp11::as_sexp(Field__nullable(field));
-END_CPP11
-}
-// field.cpp
-std::shared_ptr<arrow::DataType> Field__type(const std::shared_ptr<arrow::Field>& field);
-extern "C" SEXP _arrow_Field__type(SEXP field_sexp){
-BEGIN_CPP11
-	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
-	return cpp11::as_sexp(Field__type(field));
-END_CPP11
-}
-// field.cpp
 bool Field__HasMetadata(const std::shared_ptr<arrow::Field>& field);
 extern "C" SEXP _arrow_Field__HasMetadata(SEXP field_sexp){
 BEGIN_CPP11
@@ -3286,6 +3277,22 @@ BEGIN_CPP11
 	return cpp11::as_sexp(Field__RemoveMetadata(field));
 END_CPP11
 }
+// field.cpp
+bool Field__nullable(const std::shared_ptr<arrow::Field>& field);
+extern "C" SEXP _arrow_Field__nullable(SEXP field_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
+	return cpp11::as_sexp(Field__nullable(field));
+END_CPP11
+}
+// field.cpp
+std::shared_ptr<arrow::DataType> Field__type(const std::shared_ptr<arrow::Field>& field);
+extern "C" SEXP _arrow_Field__type(SEXP field_sexp){
+BEGIN_CPP11
+	arrow::r::Input<const std::shared_ptr<arrow::Field>&>::type field(field_sexp);
+	return cpp11::as_sexp(Field__type(field));
+END_CPP11
+}
 // filesystem.cpp
 fs::FileType fs___FileInfo__type(const std::shared_ptr<fs::FileInfo>& x);
 extern "C" SEXP _arrow_fs___FileInfo__type(SEXP x_sexp){
@@ -5967,6 +5974,7 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Float64__initialize", (DL_FUNC) &_arrow_Float64__initialize, 0}, 
 		{ "_arrow_Boolean__initialize", (DL_FUNC) &_arrow_Boolean__initialize, 0}, 
 		{ "_arrow_Utf8__initialize", (DL_FUNC) &_arrow_Utf8__initialize, 0}, 
+		{ "_arrow_StringView__initialize", (DL_FUNC) &_arrow_StringView__initialize, 0}, 
 		{ "_arrow_LargeUtf8__initialize", (DL_FUNC) &_arrow_LargeUtf8__initialize, 0}, 
 		{ "_arrow_Binary__initialize", (DL_FUNC) &_arrow_Binary__initialize, 0}, 
 		{ "_arrow_LargeBinary__initialize", (DL_FUNC) &_arrow_LargeBinary__initialize, 0}, 
@@ -6054,12 +6062,12 @@ static const R_CallMethodDef CallEntries[] = {
 		{ "_arrow_Field__ToString", (DL_FUNC) &_arrow_Field__ToString, 1}, 
 		{ "_arrow_Field__name", (DL_FUNC) &_arrow_Field__name, 1}, 
 		{ "_arrow_Field__Equals", (DL_FUNC) &_arrow_Field__Equals, 3}, 
-		{ "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, 
-		{ "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, 
 		{ "_arrow_Field__HasMetadata", (DL_FUNC) &_arrow_Field__HasMetadata, 1}, 
 		{ "_arrow_Field__metadata", (DL_FUNC) &_arrow_Field__metadata, 1}, 
 		{ "_arrow_Field__WithMetadata", (DL_FUNC) &_arrow_Field__WithMetadata, 2}, 
 		{ "_arrow_Field__RemoveMetadata", (DL_FUNC) &_arrow_Field__RemoveMetadata, 1}, 
+		{ "_arrow_Field__nullable", (DL_FUNC) &_arrow_Field__nullable, 1}, 
+		{ "_arrow_Field__type", (DL_FUNC) &_arrow_Field__type, 1}, 
 		{ "_arrow_fs___FileInfo__type", (DL_FUNC) &_arrow_fs___FileInfo__type, 1}, 
 		{ "_arrow_fs___FileInfo__set_type", (DL_FUNC) &_arrow_fs___FileInfo__set_type, 2}, 
 		{ "_arrow_fs___FileInfo__path", (DL_FUNC) &_arrow_fs___FileInfo__path, 1}, 
@@ -6303,4 +6311,3 @@ extern "C" void R_init_arrow(DllInfo* dll){
   _arrow_compute__Initialize();
 }
 
-
diff --git a/r/src/datatype.cpp b/r/src/datatype.cpp
index 3360159c58e6..0346332c0dd6 100644
--- a/r/src/datatype.cpp
+++ b/r/src/datatype.cpp
@@ -57,6 +57,8 @@ const char* r6_class_name<arrow::DataType>::get(
 
     case Type::STRING:
       return "Utf8";
+    case Type::STRING_VIEW:
+      return "StringView";
     case Type::LARGE_STRING:
       return "LargeUtf8";
 
@@ -165,6 +167,9 @@ std::shared_ptr<arrow::DataType> Boolean__initialize() { return arrow::boolean()
 // [[arrow::export]]
 std::shared_ptr<arrow::DataType> Utf8__initialize() { return arrow::utf8(); }
 
+// [[arrow::export]]
+std::shared_ptr<arrow::DataType> StringView__initialize() { return arrow::utf8_view(); }
+
 // [[arrow::export]]
 std::shared_ptr<arrow::DataType> LargeUtf8__initialize() { return arrow::large_utf8(); }
 
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index 45d68043af5a..20f45e00361b 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -910,6 +910,49 @@ class RPrimitiveConverter<T, enable_if_string_like<T>>
   }
 };
 
+template <typename T>
+class RPrimitiveConverter<T, enable_if_string_view<T>>
+    : public PrimitiveConverter<T, RConverter> {
+ public:
+  Status Extend(SEXP x, int64_t size, int64_t offset = 0) override {
+    RVectorType rtype = GetVectorType(x);
+    if (rtype != STRING) {
+      return Status::Invalid("Expecting a character vector");
+    }
+    return UnsafeAppendUtf8Strings(arrow::r::utf8_strings(x), size, offset);
+  }
+
+  void DelayedExtend(SEXP values, int64_t size, RTasks& tasks) override {
+    auto task = [this, values, size]() { return this->Extend(values, size); };
+    tasks.Append(false, std::move(task));
+  }
+
+ private:
+  Status UnsafeAppendUtf8Strings(const cpp11::strings& s, int64_t size, int64_t offset) {
+    RETURN_NOT_OK(this->primitive_builder_->Reserve(s.size()));
+    const SEXP* p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
+
+    int64_t total_length = 0;
+    for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
+      SEXP si = *p_strings;
+      total_length += si == NA_STRING ? 0 : LENGTH(si);
+    }
+    RETURN_NOT_OK(this->primitive_builder_->ReserveData(total_length));
+
+    p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
+    for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
+      SEXP si = *p_strings;
+      if (si == NA_STRING) {
+        this->primitive_builder_->UnsafeAppendNull();
+      } else {
+        this->primitive_builder_->UnsafeAppend(CHAR(si), LENGTH(si));
+      }
+    }
+
+    return Status::OK();
+  }
+};
+
 template <typename T>
 class RPrimitiveConverter<T, enable_if_t<is_duration_type<T>::value>>
     : public PrimitiveConverter<T, RConverter> {
@@ -1029,8 +1072,8 @@ class RDictionaryConverter<ValueType, enable_if_has_string_view<ValueType>>
 
     // first we need to handle the levels
     SEXP levels = Rf_getAttrib(x, R_LevelsSymbol);
-    auto memo_chunked_chunked_array =
-        arrow::r::vec_to_arrow_ChunkedArray(levels, utf8(), false);
+    auto memo_chunked_chunked_array = arrow::r::vec_to_arrow_ChunkedArray(
+        levels, this->dict_type_->value_type(), false);
     for (const auto& chunk : memo_chunked_chunked_array->chunks()) {
       RETURN_NOT_OK(this->value_builder_->InsertMemoValues(*chunk));
     }
@@ -1062,7 +1105,13 @@ struct RConverterTrait<
 };
 
 template <typename T>
-struct RConverterTrait<T, enable_if_binary_view_like<T>> {
+struct RConverterTrait<T, enable_if_string_view<T>> {
+  using type = RPrimitiveConverter<T>;
+};
+
+template <typename T>
+struct RConverterTrait<T, enable_if_t<is_binary_view_like_type<T>::value &&
+                                      !is_string_view_type<T>::value>> {
   // not implemented
 };
 
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index e404da1d029e..c43d20f8fc63 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -387,6 +387,13 @@ test_that("Table converts dictionary arrays with wider index types back to R", {
   expect_equal_data_frame(tab_uint64, fact)
 })
 
+test_that("Table converts dictionary arrays with string_view values", {
+  expected <- data.frame(foo = factor(c("x", "y", "x")))
+  tab <- Table$create(expected, schema = schema(foo = dictionary(uint32(), string_view())))
+
+  expect_equal_data_frame(tab, expected)
+})
+
 test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {
   b1 <- record_batch(f = factor(c("a"), levels = c("a", "b")))
   b2 <- record_batch(f = factor(c("c"), levels = c("c", "d")))
diff --git a/r/tests/testthat/test-data-type.R b/r/tests/testthat/test-data-type.R
index fa2e5bcd6e8d..44c8c67f4a96 100644
--- a/r/tests/testthat/test-data-type.R
+++ b/r/tests/testthat/test-data-type.R
@@ -163,6 +163,17 @@ test_that("utf8 type works as expected", {
   expect_equal(x$fields(), list())
 })
 
+test_that("string_view type works as expected", {
+  x <- string_view()
+  expect_equal(x$id, Type$STRING_VIEW)
+  expect_equal(x$name, "utf8_view")
+  expect_equal(x$ToString(), "string_view")
+  expect_true(x == x)
+  expect_false(x == null())
+  expect_equal(x$num_fields, 0L)
+  expect_equal(x$fields(), list())
+})
+
 test_that("date types work as expected", {
   x <- date32()
   expect_equal(x$id, Type$DATE32)
@@ -556,6 +567,7 @@ test_that("DataType$code()", {
 
   expect_code_roundtrip(boolean())
   expect_code_roundtrip(utf8())
+  expect_code_roundtrip(string_view())
   expect_code_roundtrip(large_utf8())
 
   expect_code_roundtrip(binary())

From 2ce9621a5fe79e2df5e9841c5a6b6dfde717d98a Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 11:41:18 -0400
Subject: [PATCH 3/9] revert unwanted docs change

---
 r/R/dplyr-funcs-doc.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/R/dplyr-funcs-doc.R b/r/R/dplyr-funcs-doc.R
index 176181a09bbb..f7ca29833c81 100644
--- a/r/R/dplyr-funcs-doc.R
+++ b/r/R/dplyr-funcs-doc.R
@@ -84,7 +84,7 @@
 #' Functions can be called either as `pkg::fun()` or just `fun()`, i.e. both
 #' `str_sub()` and `stringr::str_sub()` work.
 #'
-#' In addition to these functions, you can call any of Arrow's 253 compute
+#' In addition to these functions, you can call any of Arrow's 281 compute
 #' functions directly. Arrow has many functions that don't map to an existing R
 #' function. In other cases where there is an R function mapping, you can still
 #' call the Arrow function directly if you don't want the adaptations that the R

From cde21b51e38a983e5c699ab8e5ff8add0483b305 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 11:41:36 -0400
Subject: [PATCH 4/9] add context

---
 r/src/array_to_vector.cpp | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/r/src/array_to_vector.cpp b/r/src/array_to_vector.cpp
index bad234eb1120..4b157fc61ac9 100644
--- a/r/src/array_to_vector.cpp
+++ b/r/src/array_to_vector.cpp
@@ -290,6 +290,8 @@ struct Converter_String : public Converter {
 
   Status Ingest_some_nulls(SEXP data, const std::shared_ptr<arrow::Array>& array,
                            R_xlen_t start, R_xlen_t n, size_t chunk_index) const {
+    // StringViewArray uses a different memory layout (views + data buffers) rather
+    // than offsets, so skip the offset-based fast path and fall through to GetString().
     if constexpr (!std::is_same_v<StringArrayType, arrow::StringViewArray>) {
       auto p_offset = array->data()->GetValues<int32_t>(1);
       if (!p_offset) {

From bb711a774ee3886a22990f4e01beff0fdecacba8 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 11:42:36 -0400
Subject: [PATCH 5/9] expand tests

---
 r/tests/testthat/test-Array.R |  8 ++++++++
 r/tests/testthat/test-Table.R | 15 +++++++++++++++
 2 files changed, 23 insertions(+)

diff --git a/r/tests/testthat/test-Array.R b/r/tests/testthat/test-Array.R
index 8520160d1255..b0bfc9e53564 100644
--- a/r/tests/testthat/test-Array.R
+++ b/r/tests/testthat/test-Array.R
@@ -203,6 +203,14 @@ test_that("Array supports character vectors (ARROW-3339)", {
   # with NA
   expect_array_roundtrip(c("itsy", NA, "spider"), utf8())
   expect_array_roundtrip(c("itsy", NA, "spider"), large_utf8(), as = large_utf8())
+
+  # string_view
+  expect_array_roundtrip(c("itsy", "bitsy", "spider"), string_view(), as = string_view())
+  expect_array_roundtrip(c("itsy", NA, "spider"), string_view(), as = string_view())
+
+  # string_view with empty strings
+  expect_array_roundtrip(c("", "bitsy", ""), string_view(), as = string_view())
+  expect_array_roundtrip(c("", NA, ""), string_view(), as = string_view())
 })
 
 test_that("Character vectors > 2GB become large_utf8", {
diff --git a/r/tests/testthat/test-Table.R b/r/tests/testthat/test-Table.R
index c43d20f8fc63..01f204539b29 100644
--- a/r/tests/testthat/test-Table.R
+++ b/r/tests/testthat/test-Table.R
@@ -390,8 +390,23 @@ test_that("Table converts dictionary arrays with wider index types back to R", {
 test_that("Table converts dictionary arrays with string_view values", {
   expected <- data.frame(foo = factor(c("x", "y", "x")))
   tab <- Table$create(expected, schema = schema(foo = dictionary(uint32(), string_view())))
+  expect_equal_data_frame(tab, expected)
+
+  # with NAs
+  expected_na <- data.frame(foo = factor(c("x", NA, "x")))
+  tab_na <- Table$create(expected_na, schema = schema(foo = dictionary(uint32(), string_view())))
+  expect_equal_data_frame(tab_na, expected_na)
+})
 
+test_that("Table round-trips string_view columns", {
+  expected <- data.frame(x = c("hello", "world", ""))
+  tab <- Table$create(expected, schema = schema(x = string_view()))
   expect_equal_data_frame(tab, expected)
+
+  # with NAs
+  expected_na <- data.frame(x = c("hello", NA, ""))
+  tab_na <- Table$create(expected_na, schema = schema(x = string_view()))
+  expect_equal_data_frame(tab_na, expected_na)
 })
 
 test_that("Table unifies dictionary on conversion back to R (ARROW-8374)", {

From 0d77a6f5b568245ae57e32c83bcbef46fc4260e4 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 13:05:36 -0400
Subject: [PATCH 6/9] Fix offset bug

---
 r/man/CsvReadOptions.Rd        |   4 +-
 r/man/JsonFileFormat.Rd        |   4 -
 r/man/acero.Rd                 | 346 ++++++++++++++++-----------------
 r/man/arrow-package.Rd         |   1 +
 r/man/csv_convert_options.Rd   |   4 +-
 r/man/csv_read_options.Rd      |   8 +-
 r/man/enums.Rd                 |  33 ----
 r/man/read_json_arrow.Rd       |   2 +-
 r/man/reexports.Rd             |   4 +-
 r/man/schema.Rd                |   2 +-
 r/man/vctrs_extension_array.Rd |   4 +-
 r/src/r_to_arrow.cpp           |   6 +-
 12 files changed, 192 insertions(+), 226 deletions(-)

diff --git a/r/man/CsvReadOptions.Rd b/r/man/CsvReadOptions.Rd
index d4544cf829f2..320685b05c0d 100644
--- a/r/man/CsvReadOptions.Rd
+++ b/r/man/CsvReadOptions.Rd
@@ -69,9 +69,9 @@ generate a row of missing values (if \code{FALSE})?
 \item \code{check_utf8} Logical: check UTF8 validity of string columns? (default \code{TRUE})
 \item \code{null_values} character vector of recognized spellings for null values.
 Analogous to the \code{na.strings} argument to
-\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{\link[readr:read_delim]{readr::read_csv()}}.
+\code{\link[utils:read.csv]{read.csv()}} or \code{na} in \code{\link[readr:read_csv]{readr::read_csv()}}.
 \item \code{strings_can_be_null} Logical: can string / binary columns have
-null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_delim]{readr::read_csv()}}.
+null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_csv]{readr::read_csv()}}.
 (default \code{FALSE})
 \item \code{true_values} character vector of recognized spellings for \code{TRUE} values
 \item \code{false_values} character vector of recognized spellings for \code{FALSE} values
diff --git a/r/man/JsonFileFormat.Rd b/r/man/JsonFileFormat.Rd
index a0edb50bb5ec..79a2f22efd7d 100644
--- a/r/man/JsonFileFormat.Rd
+++ b/r/man/JsonFileFormat.Rd
@@ -30,10 +30,6 @@ characters? (default \code{FALSE})
 }
 }
 
-\examples{
-\dontshow{if (arrow_with_dataset()) withAutoprint(\{ # examplesIf}
-\dontshow{\}) # examplesIf}
-}
 \seealso{
 \link{FileFormat}
 }
diff --git a/r/man/acero.Rd b/r/man/acero.Rd
index 2e8b1fba1e89..6d4c27b18d5c 100644
--- a/r/man/acero.Rd
+++ b/r/man/acero.Rd
@@ -23,44 +23,44 @@ the query on the data. To run the query, call either \code{compute()},
 which returns an \code{arrow} \link{Table}, or \code{collect()}, which pulls the resulting
 Table into an R \code{tibble}.
 \itemize{
-\item \code{\link[dplyr:filter-joins]{anti_join()}}: the \code{copy} argument is ignored
+\item \code{\link[dplyr:anti_join]{anti_join()}}: the \code{copy} argument is ignored
 \item \code{\link[dplyr:arrange]{arrange()}}
-\item \code{\link[dplyr:compute]{collapse()}}
-\item \code{\link[dplyr:compute]{collect()}}
+\item \code{\link[dplyr:collapse]{collapse()}}
+\item \code{\link[dplyr:collect]{collect()}}
 \item \code{\link[dplyr:compute]{compute()}}
 \item \code{\link[dplyr:count]{count()}}
 \item \code{\link[dplyr:distinct]{distinct()}}: \code{.keep_all = TRUE} returns a non-missing value if present, only returning missing values if all are missing.
 \item \code{\link[dplyr:explain]{explain()}}
 \item \code{\link[dplyr:filter]{filter()}}
-\item \code{\link[dplyr:filter]{filter_out()}}
-\item \code{\link[dplyr:mutate-joins]{full_join()}}: the \code{copy} argument is ignored
+\item \code{\link[dplyr:filter_out]{filter_out()}}
+\item \code{\link[dplyr:full_join]{full_join()}}: the \code{copy} argument is ignored
 \item \code{\link[dplyr:glimpse]{glimpse()}}
 \item \code{\link[dplyr:group_by]{group_by()}}
 \item \code{\link[dplyr:group_by_drop_default]{group_by_drop_default()}}
-\item \code{\link[dplyr:group_data]{group_vars()}}
-\item \code{\link[dplyr:group_data]{groups()}}
-\item \code{\link[dplyr:mutate-joins]{inner_join()}}: the \code{copy} argument is ignored
-\item \code{\link[dplyr:mutate-joins]{left_join()}}: the \code{copy} argument is ignored
+\item \code{\link[dplyr:group_vars]{group_vars()}}
+\item \code{\link[dplyr:groups]{groups()}}
+\item \code{\link[dplyr:inner_join]{inner_join()}}: the \code{copy} argument is ignored
+\item \code{\link[dplyr:left_join]{left_join()}}: the \code{copy} argument is ignored
 \item \code{\link[dplyr:mutate]{mutate()}}
 \item \code{\link[dplyr:pull]{pull()}}: the \code{name} argument is not supported; returns an R vector by default but this behavior is deprecated and will return an Arrow \link{ChunkedArray} in a future release. Provide \code{as_vector = TRUE/FALSE} to control this behavior, or set \code{options(arrow.pull_as_vector)} globally.
 \item \code{\link[dplyr:relocate]{relocate()}}
 \item \code{\link[dplyr:rename]{rename()}}
-\item \code{\link[dplyr:rename]{rename_with()}}
-\item \code{\link[dplyr:mutate-joins]{right_join()}}: the \code{copy} argument is ignored
+\item \code{\link[dplyr:rename_with]{rename_with()}}
+\item \code{\link[dplyr:right_join]{right_join()}}: the \code{copy} argument is ignored
 \item \code{\link[dplyr:select]{select()}}
-\item \code{\link[dplyr:filter-joins]{semi_join()}}: the \code{copy} argument is ignored
-\item \code{\link[dplyr:explain]{show_query()}}
-\item \code{\link[dplyr:slice]{slice_head()}}: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:slice]{slice_max()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:slice]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:slice]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
-\item \code{\link[dplyr:slice]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
+\item \code{\link[dplyr:semi_join]{semi_join()}}: the \code{copy} argument is ignored
+\item \code{\link[dplyr:show_query]{show_query()}}
+\item \code{\link[dplyr:slice_head]{slice_head()}}: slicing within groups not supported; Arrow datasets do not have row order, so head is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
+\item \code{\link[dplyr:slice_max]{slice_max()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
+\item \code{\link[dplyr:slice_min]{slice_min()}}: slicing within groups not supported; \code{with_ties = TRUE} (dplyr default) is not supported; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
+\item \code{\link[dplyr:slice_sample]{slice_sample()}}: slicing within groups not supported; \code{replace = TRUE} and the \code{weight_by} argument not supported; \code{n} only supported on queries where \code{nrow()} is knowable without evaluating
+\item \code{\link[dplyr:slice_tail]{slice_tail()}}: slicing within groups not supported; Arrow datasets do not have row order, so tail is non-deterministic; \code{prop} only supported on queries where \code{nrow()} is knowable without evaluating
 \item \code{\link[dplyr:summarise]{summarise()}}: window functions not currently supported; arguments \code{.drop = FALSE} and \code{.groups = "rowwise"} not supported
-\item \code{\link[dplyr:count]{tally()}}
+\item \code{\link[dplyr:tally]{tally()}}
 \item \code{\link[dplyr:transmute]{transmute()}}
-\item \code{\link[dplyr:group_by]{ungroup()}}
-\item \code{\link[dplyr:setops]{union()}}
-\item \code{\link[dplyr:setops]{union_all()}}
+\item \code{\link[dplyr:ungroup]{ungroup()}}
+\item \code{\link[dplyr:union]{union()}}
+\item \code{\link[dplyr:union_all]{union_all()}}
 }
 }
 
@@ -72,7 +72,7 @@ can assume that the function works in Acero just as it does in R.
 Functions can be called either as \code{pkg::fun()} or just \code{fun()}, i.e. both
 \code{str_sub()} and \code{stringr::str_sub()} work.
 
-In addition to these functions, you can call any of Arrow's 281 compute
+In addition to these functions, you can call any of Arrow's 253 compute
 functions directly. Arrow has many functions that don't map to an existing R
 function. In other cases where there is an R function mapping, you can still
 call the Arrow function directly if you don't want the adaptations that the R
@@ -89,109 +89,109 @@ as \code{arrow_ascii_is_decimal}.
 
 \subsection{base}{
 \itemize{
-\item \code{\link[=!]{!}}
-\item \code{\link[=!=]{!=}}
-\item \code{\link[=\%\%]{\%\%}}
-\item \code{\link[=\%/\%]{\%/\%}}
-\item \code{\link[=\%in\%]{\%in\%}}
-\item \code{\link[=&]{&}}
-\item \code{\link[=*]{*}}
-\item \code{\link[=+]{+}}
-\item \code{\link[=-]{-}}
-\item \code{\link[=/]{/}}
-\item \code{\link[=<]{<}}
-\item \code{\link[=<=]{<=}}
-\item \code{\link[===]{==}}
-\item \code{\link[=>]{>}}
-\item \code{\link[=>=]{>=}}
-\item \code{\link[base:ISOdatetime]{ISOdate()}}
+\item \code{\link{!}}
+\item \code{\link{!=}}
+\item \code{\link{\%\%}}
+\item \code{\link{\%/\%}}
+\item \code{\link{\%in\%}}
+\item \code{\link{&}}
+\item \code{\link{*}}
+\item \code{\link{+}}
+\item \code{\link{-}}
+\item \code{\link{/}}
+\item \code{\link{<}}
+\item \code{\link{<=}}
+\item \code{\link{==}}
+\item \code{\link{>}}
+\item \code{\link{>=}}
+\item \code{\link[base:ISOdate]{ISOdate()}}
 \item \code{\link[base:ISOdatetime]{ISOdatetime()}}
-\item \code{\link[=^]{^}}
-\item \code{\link[base:MathFun]{abs()}}
-\item \code{\link[base:Trig]{acos()}}
-\item \code{\link[base:Hyperbolic]{acosh()}}
+\item \code{\link{^}}
+\item \code{\link[base:abs]{abs()}}
+\item \code{\link[base:acos]{acos()}}
+\item \code{\link[base:acosh]{acosh()}}
 \item \code{\link[base:all]{all()}}
 \item \code{\link[base:any]{any()}}
 \item \code{\link[base:as.Date]{as.Date()}}: Multiple \code{tryFormats} not supported in Arrow.
 Consider using the lubridate specialised parsing functions \code{ymd()}, \code{ymd()}, etc.
-\item \code{\link[base:character]{as.character()}}
-\item \code{\link[base:difftime]{as.difftime()}}: only supports \code{units = "secs"} (the default)
-\item \code{\link[base:double]{as.double()}}
-\item \code{\link[base:integer]{as.integer()}}
-\item \code{\link[base:logical]{as.logical()}}
-\item \code{\link[base:numeric]{as.numeric()}}
-\item \code{\link[base:Trig]{asin()}}
-\item \code{\link[base:Hyperbolic]{asinh()}}
-\item \code{\link[base:Trig]{atan()}}
-\item \code{\link[base:Hyperbolic]{atanh()}}
-\item \code{\link[base:Round]{ceiling()}}
-\item \code{\link[base:Trig]{cos()}}
-\item \code{\link[base:Hyperbolic]{cosh()}}
+\item \code{\link[base:as.character]{as.character()}}
+\item \code{\link[base:as.difftime]{as.difftime()}}: only supports \code{units = "secs"} (the default)
+\item \code{\link[base:as.double]{as.double()}}
+\item \code{\link[base:as.integer]{as.integer()}}
+\item \code{\link[base:as.logical]{as.logical()}}
+\item \code{\link[base:as.numeric]{as.numeric()}}
+\item \code{\link[base:asin]{asin()}}
+\item \code{\link[base:asinh]{asinh()}}
+\item \code{\link[base:atan]{atan()}}
+\item \code{\link[base:atanh]{atanh()}}
+\item \code{\link[base:ceiling]{ceiling()}}
+\item \code{\link[base:cos]{cos()}}
+\item \code{\link[base:cosh]{cosh()}}
 \item \code{\link[base:data.frame]{data.frame()}}: \code{row.names} and \code{check.rows} arguments not supported;
 \code{stringsAsFactors} must be \code{FALSE}
 \item \code{\link[base:difftime]{difftime()}}: only supports \code{units = "secs"} (the default);
 \code{tz} argument not supported
-\item \code{\link[base:startsWith]{endsWith()}}
-\item \code{\link[base:Log]{exp()}}
-\item \code{\link[base:Log]{expm1()}}
-\item \code{\link[base:Round]{floor()}}
+\item \code{\link[base:endsWith]{endsWith()}}
+\item \code{\link[base:exp]{exp()}}
+\item \code{\link[base:expm1]{expm1()}}
+\item \code{\link[base:floor]{floor()}}
 \item \code{\link[base:format]{format()}}
-\item \code{\link[base:grep]{grepl()}}
-\item \code{\link[base:grep]{gsub()}}
+\item \code{\link[base:grepl]{grepl()}}
+\item \code{\link[base:gsub]{gsub()}}
 \item \code{\link[base:ifelse]{ifelse()}}
-\item \code{\link[base:character]{is.character()}}
-\item \code{\link[base:double]{is.double()}}
-\item \code{\link[base:factor]{is.factor()}}
+\item \code{\link[base:is.character]{is.character()}}
+\item \code{\link[base:is.double]{is.double()}}
+\item \code{\link[base:is.factor]{is.factor()}}
 \item \code{\link[base:is.finite]{is.finite()}}
-\item \code{\link[base:is.finite]{is.infinite()}}
-\item \code{\link[base:integer]{is.integer()}}
-\item \code{\link[base:list]{is.list()}}
-\item \code{\link[base:logical]{is.logical()}}
-\item \code{\link[base:NA]{is.na()}}
-\item \code{\link[base:is.finite]{is.nan()}}
-\item \code{\link[base:numeric]{is.numeric()}}
-\item \code{\link[base:Log]{log()}}
-\item \code{\link[base:Log]{log10()}}
-\item \code{\link[base:Log]{log1p()}}
-\item \code{\link[base:Log]{log2()}}
-\item \code{\link[base:Log]{logb()}}
-\item \code{\link[base:Extremes]{max()}}
+\item \code{\link[base:is.infinite]{is.infinite()}}
+\item \code{\link[base:is.integer]{is.integer()}}
+\item \code{\link[base:is.list]{is.list()}}
+\item \code{\link[base:is.logical]{is.logical()}}
+\item \code{\link[base:is.na]{is.na()}}
+\item \code{\link[base:is.nan]{is.nan()}}
+\item \code{\link[base:is.numeric]{is.numeric()}}
+\item \code{\link[base:log]{log()}}
+\item \code{\link[base:log10]{log10()}}
+\item \code{\link[base:log1p]{log1p()}}
+\item \code{\link[base:log2]{log2()}}
+\item \code{\link[base:logb]{logb()}}
+\item \code{\link[base:max]{max()}}
 \item \code{\link[base:mean]{mean()}}
-\item \code{\link[base:Extremes]{min()}}
+\item \code{\link[base:min]{min()}}
 \item \code{\link[base:nchar]{nchar()}}: \code{allowNA = TRUE} and \code{keepNA = TRUE} not supported
 \item \code{\link[base:paste]{paste()}}: the \code{collapse} argument is not yet supported
-\item \code{\link[base:paste]{paste0()}}: the \code{collapse} argument is not yet supported
-\item \code{\link[base:Extremes]{pmax()}}
-\item \code{\link[base:Extremes]{pmin()}}
+\item \code{\link[base:paste0]{paste0()}}: the \code{collapse} argument is not yet supported
+\item \code{\link[base:pmax]{pmax()}}
+\item \code{\link[base:pmin]{pmin()}}
 \item \code{\link[base:prod]{prod()}}
-\item \code{\link[base:Round]{round()}}
+\item \code{\link[base:round]{round()}}
 \item \code{\link[base:sign]{sign()}}
-\item \code{\link[base:Trig]{sin()}}
-\item \code{\link[base:Hyperbolic]{sinh()}}
-\item \code{\link[base:MathFun]{sqrt()}}
+\item \code{\link[base:sin]{sin()}}
+\item \code{\link[base:sinh]{sinh()}}
+\item \code{\link[base:sqrt]{sqrt()}}
 \item \code{\link[base:startsWith]{startsWith()}}
-\item \code{\link[base:strptime]{strftime()}}
+\item \code{\link[base:strftime]{strftime()}}
 \item \code{\link[base:strptime]{strptime()}}: accepts a \code{unit} argument not present in the \code{base} function.
 Valid values are "s", "ms" (default), "us", "ns".
 \item \code{\link[base:strrep]{strrep()}}
 \item \code{\link[base:strsplit]{strsplit()}}
-\item \code{\link[base:grep]{sub()}}
+\item \code{\link[base:sub]{sub()}}
 \item \code{\link[base:substr]{substr()}}: \code{start} and \code{stop} must be length 1
-\item \code{\link[base:substr]{substring()}}
+\item \code{\link[base:substring]{substring()}}
 \item \code{\link[base:sum]{sum()}}
-\item \code{\link[base:Trig]{tan()}}
-\item \code{\link[base:Hyperbolic]{tanh()}}
-\item \code{\link[base:chartr]{tolower()}}
-\item \code{\link[base:chartr]{toupper()}}
-\item \code{\link[base:Round]{trunc()}}
-\item \code{\link[=|]{|}}
+\item \code{\link[base:tan]{tan()}}
+\item \code{\link[base:tanh]{tanh()}}
+\item \code{\link[base:tolower]{tolower()}}
+\item \code{\link[base:toupper]{toupper()}}
+\item \code{\link[base:trunc]{trunc()}}
+\item \code{\link{|}}
 }
 }
 
 \subsection{bit64}{
 \itemize{
-\item \code{\link[bit64:as.integer64.character]{as.integer64()}}
-\item \code{\link[bit64:bit64-package]{is.integer64()}}
+\item \code{\link[bit64:as.integer64]{as.integer64()}}
+\item \code{\link[bit64:is.integer64]{is.integer64()}}
 }
 }
 
@@ -199,25 +199,25 @@ Valid values are "s", "ms" (default), "us", "ns".
 \itemize{
 \item \code{\link[dplyr:across]{across()}}
 \item \code{\link[dplyr:between]{between()}}
-\item \code{\link[dplyr:case-and-replace-when]{case_when()}}: \code{.ptype} and \code{.size} arguments not supported
+\item \code{\link[dplyr:case_when]{case_when()}}: \code{.ptype} and \code{.size} arguments not supported
 \item \code{\link[dplyr:coalesce]{coalesce()}}
 \item \code{\link[dplyr:desc]{desc()}}
-\item \code{\link[dplyr:across]{if_all()}}
-\item \code{\link[dplyr:across]{if_any()}}
+\item \code{\link[dplyr:if_all]{if_all()}}
+\item \code{\link[dplyr:if_any]{if_any()}}
 \item \code{\link[dplyr:if_else]{if_else()}}
-\item \code{\link[dplyr:context]{n()}}
+\item \code{\link[dplyr:n]{n()}}
 \item \code{\link[dplyr:n_distinct]{n_distinct()}}
-\item \code{\link[dplyr:recode-and-replace-values]{recode_values()}}: \code{ptype} argument and \code{unmatched = "error"} not supported
-\item \code{\link[dplyr:recode-and-replace-values]{replace_values()}}
-\item \code{\link[dplyr:case-and-replace-when]{replace_when()}}
-\item \code{\link[dplyr:when-any-all]{when_all()}}
-\item \code{\link[dplyr:when-any-all]{when_any()}}
+\item \code{\link[dplyr:recode_values]{recode_values()}}: \code{ptype} argument and \code{unmatched = "error"} not supported
+\item \code{\link[dplyr:replace_values]{replace_values()}}
+\item \code{\link[dplyr:replace_when]{replace_when()}}
+\item \code{\link[dplyr:when_all]{when_all()}}
+\item \code{\link[dplyr:when_any]{when_any()}}
 }
 }
 
 \subsection{hms}{
 \itemize{
-\item \code{\link[hms:hms]{as_hms()}}: subsecond precision not supported for character input
+\item \code{\link[hms:as_hms]{as_hms()}}: subsecond precision not supported for character input
 \item \code{\link[hms:hms]{hms()}}: nanosecond times not supported
 }
 }
@@ -226,83 +226,83 @@ Valid values are "s", "ms" (default), "us", "ns".
 \itemize{
 \item \code{\link[lubridate:am]{am()}}
 \item \code{\link[lubridate:as_date]{as_date()}}
-\item \code{\link[lubridate:as_date]{as_datetime()}}
-\item \code{\link[lubridate:round_date]{ceiling_date()}}
+\item \code{\link[lubridate:as_datetime]{as_datetime()}}
+\item \code{\link[lubridate:ceiling_date]{ceiling_date()}}
 \item \code{\link[lubridate:date]{date()}}
 \item \code{\link[lubridate:date_decimal]{date_decimal()}}
 \item \code{\link[lubridate:day]{day()}}
-\item \code{\link[lubridate:duration]{ddays()}}
+\item \code{\link[lubridate:ddays]{ddays()}}
 \item \code{\link[lubridate:decimal_date]{decimal_date()}}
-\item \code{\link[lubridate:duration]{dhours()}}
-\item \code{\link[lubridate:duration]{dmicroseconds()}}
-\item \code{\link[lubridate:duration]{dmilliseconds()}}
-\item \code{\link[lubridate:duration]{dminutes()}}
-\item \code{\link[lubridate:duration]{dmonths()}}
-\item \code{\link[lubridate:ymd]{dmy()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{dmy_h()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{dmy_hm()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{dmy_hms()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:duration]{dnanoseconds()}}
-\item \code{\link[lubridate:duration]{dpicoseconds()}}: not supported
-\item \code{\link[lubridate:duration]{dseconds()}}
+\item \code{\link[lubridate:dhours]{dhours()}}
+\item \code{\link[lubridate:dmicroseconds]{dmicroseconds()}}
+\item \code{\link[lubridate:dmilliseconds]{dmilliseconds()}}
+\item \code{\link[lubridate:dminutes]{dminutes()}}
+\item \code{\link[lubridate:dmonths]{dmonths()}}
+\item \code{\link[lubridate:dmy]{dmy()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:dmy_h]{dmy_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:dmy_hm]{dmy_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:dmy_hms]{dmy_hms()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:dnanoseconds]{dnanoseconds()}}
+\item \code{\link[lubridate:dpicoseconds]{dpicoseconds()}}: not supported
+\item \code{\link[lubridate:dseconds]{dseconds()}}
 \item \code{\link[lubridate:dst]{dst()}}
-\item \code{\link[lubridate:duration]{dweeks()}}
-\item \code{\link[lubridate:duration]{dyears()}}
-\item \code{\link[lubridate:ymd]{dym()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:week]{epiweek()}}
-\item \code{\link[lubridate:year]{epiyear()}}
-\item \code{\link[lubridate:parse_date_time]{fast_strptime()}}: non-default values of \code{lt} and \code{cutoff_2000} not supported
-\item \code{\link[lubridate:round_date]{floor_date()}}
+\item \code{\link[lubridate:dweeks]{dweeks()}}
+\item \code{\link[lubridate:dyears]{dyears()}}
+\item \code{\link[lubridate:dym]{dym()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:epiweek]{epiweek()}}
+\item \code{\link[lubridate:epiyear]{epiyear()}}
+\item \code{\link[lubridate:fast_strptime]{fast_strptime()}}: non-default values of \code{lt} and \code{cutoff_2000} not supported
+\item \code{\link[lubridate:floor_date]{floor_date()}}
 \item \code{\link[lubridate:force_tz]{force_tz()}}: Timezone conversion from non-UTC timezone not supported;
 \code{roll_dst} values of 'error' and 'boundary' are supported for nonexistent times,
 \code{roll_dst} values of 'error', 'pre', and 'post' are supported for ambiguous times.
 \item \code{\link[lubridate:format_ISO8601]{format_ISO8601()}}
 \item \code{\link[lubridate:hour]{hour()}}
-\item \code{\link[lubridate:date_utils]{is.Date()}}
-\item \code{\link[lubridate:posix_utils]{is.POSIXct()}}
+\item \code{\link[lubridate:is.Date]{is.Date()}}
+\item \code{\link[lubridate:is.POSIXct]{is.POSIXct()}}
 \item \code{\link[lubridate:is.instant]{is.instant()}}
-\item \code{\link[lubridate:is.instant]{is.timepoint()}}
-\item \code{\link[lubridate:week]{isoweek()}}
-\item \code{\link[lubridate:year]{isoyear()}}
+\item \code{\link[lubridate:is.timepoint]{is.timepoint()}}
+\item \code{\link[lubridate:isoweek]{isoweek()}}
+\item \code{\link[lubridate:isoyear]{isoyear()}}
 \item \code{\link[lubridate:leap_year]{leap_year()}}
-\item \code{\link[lubridate:make_datetime]{make_date()}}
+\item \code{\link[lubridate:make_date]{make_date()}}
 \item \code{\link[lubridate:make_datetime]{make_datetime()}}: only supports UTC (default) timezone
 \item \code{\link[lubridate:make_difftime]{make_difftime()}}: only supports \code{units = "secs"} (the default);
 providing both \code{num} and \code{...} is not supported
-\item \code{\link[lubridate:day]{mday()}}
-\item \code{\link[lubridate:ymd]{mdy()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{mdy_h()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{mdy_hm()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{mdy_hms()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:mday]{mday()}}
+\item \code{\link[lubridate:mdy]{mdy()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:mdy_h]{mdy_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:mdy_hm]{mdy_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:mdy_hms]{mdy_hms()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:minute]{minute()}}
 \item \code{\link[lubridate:month]{month()}}
-\item \code{\link[lubridate:ymd]{my()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd]{myd()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:my]{my()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:myd]{myd()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:parse_date_time]{parse_date_time()}}: \code{quiet = FALSE} is not supported
 Available formats are H, I, j, M, S, U, w, W, y, Y, R, T.
 On Linux and OS X additionally a, A, b, B, Om, p, r are available.
-\item \code{\link[lubridate:am]{pm()}}
-\item \code{\link[lubridate:day]{qday()}}
+\item \code{\link[lubridate:pm]{pm()}}
+\item \code{\link[lubridate:qday]{qday()}}
 \item \code{\link[lubridate:quarter]{quarter()}}
 \item \code{\link[lubridate:round_date]{round_date()}}
 \item \code{\link[lubridate:second]{second()}}
-\item \code{\link[lubridate:quarter]{semester()}}
+\item \code{\link[lubridate:semester]{semester()}}
 \item \code{\link[lubridate:tz]{tz()}}
-\item \code{\link[lubridate:day]{wday()}}
+\item \code{\link[lubridate:wday]{wday()}}
 \item \code{\link[lubridate:week]{week()}}
 \item \code{\link[lubridate:with_tz]{with_tz()}}
-\item \code{\link[lubridate:day]{yday()}}
-\item \code{\link[lubridate:ymd]{ydm()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{ydm_h()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{ydm_hm()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{ydm_hms()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:yday]{yday()}}
+\item \code{\link[lubridate:ydm]{ydm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ydm_h]{ydm_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ydm_hm]{ydm_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ydm_hms]{ydm_hms()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:year]{year()}}
-\item \code{\link[lubridate:ymd]{ym()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ym]{ym()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:ymd]{ymd()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{ymd_h()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd_hms]{ymd_hm()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_h]{ymd_h()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:ymd_hm]{ymd_hm()}}: \code{locale} argument not supported
 \item \code{\link[lubridate:ymd_hms]{ymd_hms()}}: \code{locale} argument not supported
-\item \code{\link[lubridate:ymd]{yq()}}: \code{locale} argument not supported
+\item \code{\link[lubridate:yq]{yq()}}: \code{locale} argument not supported
 }
 }
 
@@ -314,11 +314,11 @@ On Linux and OS X additionally a, A, b, B, Om, p, r are available.
 
 \subsection{rlang}{
 \itemize{
-\item \code{\link[rlang:type-predicates]{is_character()}}
-\item \code{\link[rlang:type-predicates]{is_double()}}
-\item \code{\link[rlang:type-predicates]{is_integer()}}
-\item \code{\link[rlang:type-predicates]{is_list()}}
-\item \code{\link[rlang:type-predicates]{is_logical()}}
+\item \code{\link[rlang:is_character]{is_character()}}
+\item \code{\link[rlang:is_double]{is_double()}}
+\item \code{\link[rlang:is_integer]{is_integer()}}
+\item \code{\link[rlang:is_list]{is_list()}}
+\item \code{\link[rlang:is_logical]{is_logical()}}
 }
 }
 
@@ -328,7 +328,7 @@ On Linux and OS X additionally a, A, b, B, Om, p, r are available.
 \item \code{\link[stats:quantile]{quantile()}}: \code{probs} must be length 1;
 approximate quantile (t-digest) is computed
 \item \code{\link[stats:sd]{sd()}}
-\item \code{\link[stats:cor]{var()}}
+\item \code{\link[stats:var]{var()}}
 }
 }
 
@@ -346,22 +346,22 @@ Pattern modifiers \code{coll()} and \code{boundary()} are not supported in any f
 \item \code{\link[stringr:str_count]{str_count()}}: \code{pattern} must be a length 1 character vector
 \item \code{\link[stringr:str_detect]{str_detect()}}
 \item \code{\link[stringr:str_dup]{str_dup()}}
-\item \code{\link[stringr:str_starts]{str_ends()}}
-\item \code{\link[stringr:str_like]{str_ilike()}}
+\item \code{\link[stringr:str_ends]{str_ends()}}
+\item \code{\link[stringr:str_ilike]{str_ilike()}}
 \item \code{\link[stringr:str_length]{str_length()}}
 \item \code{\link[stringr:str_like]{str_like()}}
 \item \code{\link[stringr:str_pad]{str_pad()}}
 \item \code{\link[stringr:str_remove]{str_remove()}}
-\item \code{\link[stringr:str_remove]{str_remove_all()}}
+\item \code{\link[stringr:str_remove_all]{str_remove_all()}}
 \item \code{\link[stringr:str_replace]{str_replace()}}
-\item \code{\link[stringr:str_replace]{str_replace_all()}}
+\item \code{\link[stringr:str_replace_all]{str_replace_all()}}
 \item \code{\link[stringr:str_replace_na]{str_replace_na()}}
 \item \code{\link[stringr:str_split]{str_split()}}: Case-insensitive string splitting and splitting into 0 parts not supported
 \item \code{\link[stringr:str_starts]{str_starts()}}
 \item \code{\link[stringr:str_sub]{str_sub()}}: \code{start} and \code{end} must be length 1
-\item \code{\link[stringr:case]{str_to_lower()}}
-\item \code{\link[stringr:case]{str_to_title()}}
-\item \code{\link[stringr:case]{str_to_upper()}}
+\item \code{\link[stringr:str_to_lower]{str_to_lower()}}
+\item \code{\link[stringr:str_to_title]{str_to_title()}}
+\item \code{\link[stringr:str_to_upper]{str_to_upper()}}
 \item \code{\link[stringr:str_trim]{str_trim()}}
 }
 }
@@ -375,12 +375,12 @@ Pattern modifiers \code{coll()} and \code{boundary()} are not supported in any f
 \subsection{tidyselect}{
 \itemize{
 \item \code{\link[tidyselect:all_of]{all_of()}}
-\item \code{\link[tidyselect:starts_with]{contains()}}
-\item \code{\link[tidyselect:starts_with]{ends_with()}}
+\item \code{\link[tidyselect:contains]{contains()}}
+\item \code{\link[tidyselect:ends_with]{ends_with()}}
 \item \code{\link[tidyselect:everything]{everything()}}
-\item \code{\link[tidyselect:everything]{last_col()}}
-\item \code{\link[tidyselect:starts_with]{matches()}}
-\item \code{\link[tidyselect:starts_with]{num_range()}}
+\item \code{\link[tidyselect:last_col]{last_col()}}
+\item \code{\link[tidyselect:matches]{matches()}}
+\item \code{\link[tidyselect:num_range]{num_range()}}
 \item \code{\link[tidyselect:one_of]{one_of()}}
 \item \code{\link[tidyselect:starts_with]{starts_with()}}
 }
diff --git a/r/man/arrow-package.Rd b/r/man/arrow-package.Rd
index 69cef1bacccf..c96a0ddb84e3 100644
--- a/r/man/arrow-package.Rd
+++ b/r/man/arrow-package.Rd
@@ -22,6 +22,7 @@ Useful links:
 
 Authors:
 \itemize{
+  \item Jonathan Keane \email{jkeane@gmail.com}
   \item Neal Richardson \email{neal.p.richardson@gmail.com}
   \item Ian Cook \email{ianmcook@gmail.com}
   \item Nic Crane \email{thisisnic@gmail.com}
diff --git a/r/man/csv_convert_options.Rd b/r/man/csv_convert_options.Rd
index 58e685351b91..c61da51fb7ce 100644
--- a/r/man/csv_convert_options.Rd
+++ b/r/man/csv_convert_options.Rd
@@ -24,14 +24,14 @@ csv_convert_options(
 
 \item{null_values}{Character vector of recognized spellings for null values.
 Analogous to the \code{na.strings} argument to
-\code{\link[utils:read.table]{read.csv()}} or \code{na} in \code{\link[readr:read_delim]{readr::read_csv()}}.}
+\code{\link[utils:read.csv]{read.csv()}} or \code{na} in \code{\link[readr:read_csv]{readr::read_csv()}}.}
 
 \item{true_values}{Character vector of recognized spellings for \code{TRUE} values}
 
 \item{false_values}{Character vector of recognized spellings for \code{FALSE} values}
 
 \item{strings_can_be_null}{Logical: can string / binary columns have
-null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_delim]{readr::read_csv()}}}
+null values? Similar to the \code{quoted_na} argument to \code{\link[readr:read_csv]{readr::read_csv()}}}
 
 \item{col_types}{A \code{Schema} or \code{NULL} to infer types}
 
diff --git a/r/man/csv_read_options.Rd b/r/man/csv_read_options.Rd
index 8049403f26dc..cea245ff0ac9 100644
--- a/r/man/csv_read_options.Rd
+++ b/r/man/csv_read_options.Rd
@@ -35,9 +35,11 @@ be "f0", "f1", ..., "fN".}
 \item{skip_rows_after_names}{Number of lines to skip after the column names (default 0).
 This number can be larger than the number of rows in one block, and empty rows are counted.
 The order of application is as follows:
-- \code{skip_rows} is applied (if non-zero);
-- column names are read (unless \code{column_names} is set);
-- \code{skip_rows_after_names} is applied (if non-zero).}
+\itemize{
+\item \code{skip_rows} is applied (if non-zero);
+\item column names are read (unless \code{column_names} is set);
+\item \code{skip_rows_after_names} is applied (if non-zero).
+}}
 }
 \description{
 CSV Reading Options
diff --git a/r/man/enums.Rd b/r/man/enums.Rd
index 4088e7d843b8..6807ce662555 100644
--- a/r/man/enums.Rd
+++ b/r/man/enums.Rd
@@ -1,6 +1,5 @@
 % Generated by roxygen2: do not edit by hand
 % Please edit documentation in R/enums.R
-\docType{data}
 \name{enums}
 \alias{enums}
 \alias{TimeUnit}
@@ -19,37 +18,6 @@
 \alias{RoundMode}
 \alias{JoinType}
 \title{Arrow enums}
-\format{
-An object of class \code{TimeUnit::type} (inherits from \code{arrow-enum}) of length 4.
-
-An object of class \code{DateUnit} (inherits from \code{arrow-enum}) of length 2.
-
-An object of class \code{Type::type} (inherits from \code{arrow-enum}) of length 45.
-
-An object of class \code{StatusCode} (inherits from \code{arrow-enum}) of length 13.
-
-An object of class \code{FileMode} (inherits from \code{arrow-enum}) of length 3.
-
-An object of class \code{MessageType} (inherits from \code{arrow-enum}) of length 5.
-
-An object of class \code{Compression::type} (inherits from \code{arrow-enum}) of length 9.
-
-An object of class \code{FileType} (inherits from \code{arrow-enum}) of length 4.
-
-An object of class \code{ParquetVersionType} (inherits from \code{arrow-enum}) of length 3.
-
-An object of class \code{MetadataVersion} (inherits from \code{arrow-enum}) of length 5.
-
-An object of class \code{QuantileInterpolation} (inherits from \code{arrow-enum}) of length 5.
-
-An object of class \code{NullEncodingBehavior} (inherits from \code{arrow-enum}) of length 2.
-
-An object of class \code{NullHandlingBehavior} (inherits from \code{arrow-enum}) of length 3.
-
-An object of class \code{RoundMode} (inherits from \code{arrow-enum}) of length 10.
-
-An object of class \code{JoinType} (inherits from \code{arrow-enum}) of length 8.
-}
 \usage{
 TimeUnit
 
@@ -84,5 +52,4 @@ JoinType
 \description{
 Arrow enums
 }
-\keyword{datasets}
 \keyword{internal}
diff --git a/r/man/read_json_arrow.Rd b/r/man/read_json_arrow.Rd
index abf6b8fc44a8..b809a63bcc6f 100644
--- a/r/man/read_json_arrow.Rd
+++ b/r/man/read_json_arrow.Rd
@@ -54,7 +54,7 @@ If \code{schema} is not provided, Arrow data types are inferred from the data:
 \item JSON numbers convert to \code{\link[=int64]{int64()}}, falling back to \code{\link[=float64]{float64()}} if a non-integer is encountered.
 \item JSON strings of the kind "YYYY-MM-DD" and "YYYY-MM-DD hh:mm:ss" convert to \code{\link[=timestamp]{timestamp(unit = "s")}},
 falling back to \code{\link[=utf8]{utf8()}} if a conversion error occurs.
-\item JSON arrays convert to a \code{\link[vctrs:list_of]{vctrs::list_of()}} type, and inference proceeds recursively on the JSON arrays' values.
+\item JSON arrays convert to a \code{\link[=list_of]{list_of()}} type, and inference proceeds recursively on the JSON arrays' values.
 \item Nested JSON objects convert to a \code{\link[=struct]{struct()}} type, and inference proceeds recursively on the JSON objects' values.
 }
 
diff --git a/r/man/reexports.Rd b/r/man/reexports.Rd
index 591158c72f4c..756df2d884c3 100644
--- a/r/man/reexports.Rd
+++ b/r/man/reexports.Rd
@@ -22,8 +22,8 @@ These objects are imported from other packages. Follow the links
 below to see their documentation.
 
 \describe{
-  \item{bit64}{\code{\link[bit64:bit64-package]{print.integer64}}, \code{\link[bit64:bit64-package]{str.integer64}}}
+  \item{bit64}{\code{\link[bit64:print.integer64]{print.integer64()}}, \code{\link[bit64:str.integer64]{str.integer64()}}}
 
-  \item{tidyselect}{\code{\link[tidyselect]{all_of}}, \code{\link[tidyselect:starts_with]{contains}}, \code{\link[tidyselect:starts_with]{ends_with}}, \code{\link[tidyselect]{everything}}, \code{\link[tidyselect:everything]{last_col}}, \code{\link[tidyselect:starts_with]{matches}}, \code{\link[tidyselect:starts_with]{num_range}}, \code{\link[tidyselect]{one_of}}, \code{\link[tidyselect]{starts_with}}}
+  \item{tidyselect}{\code{\link[tidyselect:all_of]{all_of()}}, \code{\link[tidyselect:contains]{contains()}}, \code{\link[tidyselect:ends_with]{ends_with()}}, \code{\link[tidyselect:everything]{everything()}}, \code{\link[tidyselect:last_col]{last_col()}}, \code{\link[tidyselect:matches]{matches()}}, \code{\link[tidyselect:num_range]{num_range()}}, \code{\link[tidyselect:one_of]{one_of()}}, \code{\link[tidyselect:starts_with]{starts_with()}}}
 }}
 
diff --git a/r/man/schema.Rd b/r/man/schema.Rd
index ff77a05d84aa..65ab2eea0d27 100644
--- a/r/man/schema.Rd
+++ b/r/man/schema.Rd
@@ -7,7 +7,7 @@
 schema(...)
 }
 \arguments{
-\item{...}{\link[vctrs:fields]{fields}, field name/\link[=data-type]{data type} pairs (or a list of), or object from which to extract
+\item{...}{\link[=field]{fields}, field name/\link[=data-type]{data type} pairs (or a list of), or object from which to extract
 a schema}
 }
 \description{
diff --git a/r/man/vctrs_extension_array.Rd b/r/man/vctrs_extension_array.Rd
index a3b9d902a1fb..6fb1b333277f 100644
--- a/r/man/vctrs_extension_array.Rd
+++ b/r/man/vctrs_extension_array.Rd
@@ -10,7 +10,7 @@ vctrs_extension_array(x, ptype = vctrs::vec_ptype(x), storage_type = NULL)
 vctrs_extension_type(x, storage_type = infer_type(vctrs::vec_data(x)))
 }
 \arguments{
-\item{x}{A vctr (i.e., \code{\link[vctrs:vec_assert]{vctrs::vec_is()}} returns \code{TRUE}).}
+\item{x}{A vctr (i.e., \code{\link[vctrs:vec_is]{vctrs::vec_is()}} returns \code{TRUE}).}
 
 \item{ptype}{A \code{\link[vctrs:vec_ptype]{vctrs::vec_ptype()}}, which is usually a zero-length
 version of the object with the appropriate attributes set. This value
@@ -33,7 +33,7 @@ Most common R vector types are converted automatically to a suitable
 Arrow \link[=data-type]{data type} without the need for an extension type. For
 vector types whose conversion is not suitably handled by default, you can
 create a \code{\link[=vctrs_extension_array]{vctrs_extension_array()}}, which passes \code{\link[vctrs:vec_data]{vctrs::vec_data()}} to
-\code{Array$create()} and calls \code{\link[vctrs:vec_proxy]{vctrs::vec_restore()}} when the \link{Array} is
+\code{Array$create()} and calls \code{\link[vctrs:vec_restore]{vctrs::vec_restore()}} when the \link{Array} is
 converted back into an R vector.
 }
 \examples{
diff --git a/r/src/r_to_arrow.cpp b/r/src/r_to_arrow.cpp
index 20f45e00361b..9ce20b559d0f 100644
--- a/r/src/r_to_arrow.cpp
+++ b/r/src/r_to_arrow.cpp
@@ -929,8 +929,8 @@ class RPrimitiveConverter<T, enable_if_string_view<T>>
 
  private:
   Status UnsafeAppendUtf8Strings(const cpp11::strings& s, int64_t size, int64_t offset) {
-    RETURN_NOT_OK(this->primitive_builder_->Reserve(s.size()));
-    const SEXP* p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
+    RETURN_NOT_OK(this->primitive_builder_->Reserve(size - offset));
+    const SEXP* p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s)) + offset;
 
     int64_t total_length = 0;
     for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
@@ -939,7 +939,7 @@ class RPrimitiveConverter<T, enable_if_string_view<T>>
     }
     RETURN_NOT_OK(this->primitive_builder_->ReserveData(total_length));
 
-    p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s));
+    p_strings = reinterpret_cast<const SEXP*>(DATAPTR_RO(s)) + offset;
     for (R_xlen_t i = offset; i < size; i++, ++p_strings) {
       SEXP si = *p_strings;
       if (si == NA_STRING) {

From 9f59d40a2cf61bf5078081a4316e7839e87f9c1d Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 13:07:23 -0400
Subject: [PATCH 7/9] auto-update description

---
 r/DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/r/DESCRIPTION b/r/DESCRIPTION
index c76dfc5572fe..b21b12cba7bd 100644
--- a/r/DESCRIPTION
+++ b/r/DESCRIPTION
@@ -44,7 +44,6 @@ Imports:
     utils,
     vctrs
 Roxygen: list(markdown = TRUE, r6 = FALSE, load = "source")
-RoxygenNote: 7.3.3
 Config/testthat/edition: 3
 Config/build/bootstrap: TRUE
 Suggests:
@@ -152,3 +151,4 @@ Collate:
     'schema.R'
     'udf.R'
     'util.R'
+Config/roxygen2/version: 8.0.0

From 8925a6b8c848676670a3b52ab28e2088cc8bfe10 Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 13:21:37 -0400
Subject: [PATCH 8/9] Fix DictionaryBuilder<StringViewType> Append signature in
 Python bindings

---
 python/pyarrow/src/arrow/python/python_to_arrow.cc | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index e7ce54abcd8f..16b91ce7f251 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -826,7 +826,8 @@ class PyDictionaryConverter<U, enable_if_has_string_view<U>>
     } else {
       ARROW_RETURN_NOT_OK(
           PyValue::Convert(this->value_type_, this->options_, value, view_));
-      return this->value_builder_->Append(view_.bytes, static_cast<int32_t>(view_.size));
+      return this->value_builder_->Append(reinterpret_cast<const uint8_t*>(view_.bytes),
+                                          static_cast<int32_t>(view_.size));
     }
   }
 

From de59365eb6edf83d28341621a58e224c52e2d04f Mon Sep 17 00:00:00 2001
From: Nic Crane <thisisnic@gmail.com>
Date: Tue, 5 May 2026 14:18:31 -0400
Subject: [PATCH 9/9] Fix DictionaryBuilder::Append for StringViewType in
 Python bindings

---
 python/pyarrow/src/arrow/python/python_to_arrow.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/pyarrow/src/arrow/python/python_to_arrow.cc b/python/pyarrow/src/arrow/python/python_to_arrow.cc
index 16b91ce7f251..8c92918cf30a 100644
--- a/python/pyarrow/src/arrow/python/python_to_arrow.cc
+++ b/python/pyarrow/src/arrow/python/python_to_arrow.cc
@@ -826,8 +826,8 @@ class PyDictionaryConverter<U, enable_if_has_string_view<U>>
     } else {
       ARROW_RETURN_NOT_OK(
           PyValue::Convert(this->value_type_, this->options_, value, view_));
-      return this->value_builder_->Append(reinterpret_cast<const uint8_t*>(view_.bytes),
-                                          static_cast<int32_t>(view_.size));
+      return this->value_builder_->Append(
+          std::string_view(view_.bytes, static_cast<size_t>(view_.size)));
     }
   }