diff --git a/r/R/ipc-stream.R b/r/R/ipc-stream.R index 26a61a790f93..8ebb5e36636e 100644 --- a/r/R/ipc-stream.R +++ b/r/R/ipc-stream.R @@ -95,6 +95,10 @@ write_to_raw <- function(x, format = c("stream", "file")) { #' Arrow [Table] otherwise #' @seealso [write_feather()] for writing IPC files. [RecordBatchReader] for a #' lower-level interface. +#' @section Untrusted data: +#' If reading from an untrusted source, you can validate the data by reading +#' with `as_data_frame = FALSE` and calling `$ValidateFull()` on the Table +#' before processing. #' @export read_ipc_stream <- function(file, as_data_frame = TRUE, ...) { if (!inherits(file, "InputStream")) { diff --git a/r/man/DictionaryType.Rd b/r/man/DictionaryType.Rd index 8c9087f1ab68..cda27978b1b4 100644 --- a/r/man/DictionaryType.Rd +++ b/r/man/DictionaryType.Rd @@ -3,13 +3,40 @@ \docType{class} \name{DictionaryType} \alias{DictionaryType} -\title{class DictionaryType} +\title{DictionaryType class} \description{ -class DictionaryType +\code{DictionaryType} is a \link{FixedWidthType} that represents dictionary-encoded data. +Dictionary encoding stores unique values in a dictionary and uses integer-type +indices to reference them, which can be more memory-efficient for data with many +repeated values. } -\section{Methods}{ +\section{R6 Methods}{ +\itemize{ +\item \verb{$ToString()}: Return a string representation of the dictionary type +\item \verb{$code(namespace = FALSE)}: Return R code to create this dictionary type +} +} + +\section{Active bindings}{ -TODO +\itemize{ +\item \verb{$index_type}: The \link{DataType} for the dictionary indices (must be an integer type, +signed or unsigned) +\item \verb{$value_type}: The \link{DataType} for the dictionary values +\item \verb{$name}: The name of the type. +\item \verb{$ordered}: Whether the dictionary is ordered. +} +} + +\section{Factory}{ + + +\code{DictionaryType$create()} takes the following arguments: +\itemize{ +\item \code{index_type}: A \link{DataType} for the indices (default \code{\link[=int32]{int32()}}) +\item \code{value_type}: A \link{DataType} for the values (default \code{\link[=utf8]{utf8()}}) +\item \code{ordered}: Is this an ordered dictionary (default \code{FALSE})? +} } diff --git a/r/man/FixedWidthType.Rd b/r/man/FixedWidthType.Rd index ac6723d79dbb..71d0ab2d2766 100644 --- a/r/man/FixedWidthType.Rd +++ b/r/man/FixedWidthType.Rd @@ -5,11 +5,22 @@ \alias{FixedWidthType} \title{FixedWidthType class} \description{ -FixedWidthType class +\code{FixedWidthType} is a base class for data types with a fixed width in bits. +This includes all integer types, floating-point types, \code{Boolean}, +\code{FixedSizeBinary}, temporal types (dates, times, timestamps, durations), +and decimal types. } -\section{Methods}{ +\section{R6 Methods}{ -TODO +\code{FixedWidthType} inherits from \link{DataType}, so it has the same methods. } +\section{Active bindings}{ + +\itemize{ +\item \verb{$bit_width}: The width of the type in bits +} +} + +\keyword{internal} diff --git a/r/man/Message.Rd b/r/man/Message.Rd index fbad235b64fe..b8be82bfa4bb 100644 --- a/r/man/Message.Rd +++ b/r/man/Message.Rd @@ -5,11 +5,24 @@ \alias{Message} \title{Message class} \description{ -Message class +\code{Message} holds an Arrow IPC message, which includes metadata and +an optional message body. } -\section{Methods}{ +\section{R6 Methods}{ +\itemize{ +\item \verb{$Equals(other)}: Check if this \code{Message} is equal to another \code{Message} +\item \verb{$body_length()}: Return the length of the message body in bytes +\item \verb{$Verify()}: Check if the \code{Message} metadata is valid Flatbuffer format +} +} -TODO +\section{Active bindings}{ + +\itemize{ +\item \verb{$type}: The message type +\item \verb{$metadata}: The message metadata +\item \verb{$body}: The message body as a \link{Buffer} +} } diff --git a/r/man/MessageReader.Rd b/r/man/MessageReader.Rd index 32ca8900b33a..4c3bef3fc9f4 100644 --- a/r/man/MessageReader.Rd +++ b/r/man/MessageReader.Rd @@ -5,11 +5,22 @@ \alias{MessageReader} \title{MessageReader class} \description{ -MessageReader class +\code{MessageReader} reads \code{Message} objects from an input stream. } -\section{Methods}{ +\section{R6 Methods}{ +\itemize{ +\item \verb{$ReadNextMessage()}: Read the next \code{Message} from the stream. Returns \code{NULL} if +there are no more messages. +} +} + +\section{Factory}{ -TODO + +\code{MessageReader$create()} takes the following argument: +\itemize{ +\item \code{stream}: An \link{InputStream} or object coercible to one (e.g., a raw vector) +} } diff --git a/r/man/read_ipc_stream.Rd b/r/man/read_ipc_stream.Rd index 49d3949bfcf2..601edb2af068 100644 --- a/r/man/read_ipc_stream.Rd +++ b/r/man/read_ipc_stream.Rd @@ -27,6 +27,13 @@ Apache Arrow defines two formats for \href{https://arrow.apache.org/docs/format/ a "stream" format and a "file" format, known as Feather. \code{read_ipc_stream()} and \code{\link[=read_feather]{read_feather()}} read those formats, respectively. } +\section{Untrusted data}{ + +If reading from an untrusted source, you can validate the data by reading +with \code{as_data_frame = FALSE} and calling \verb{$ValidateFull()} on the Table +before processing. +} + \seealso{ \code{\link[=write_feather]{write_feather()}} for writing IPC files. \link{RecordBatchReader} for a lower-level interface.