Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: stt.api
Title: 'OpenAI' Compatible Speech-to-Text API Client
Version: 0.2.1
Version: 0.2.1.1
Authors@R: c(
person("Troy", "Hernandez", email = "troy@cornball.ai", role = c("aut", "cre"),
comment = c(ORCID = "0009-0005-4248-604X")),
Expand Down
23 changes: 22 additions & 1 deletion R/stt.R
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
#' \item{backend}{Which backend was used ("api" or "whisper").}
#' \item{raw}{The raw response from the backend.}
#' }
#' The result also carries a \code{"call_record"} attribute (cornball_sidecar
#' v1, as in xtx.api/tts.api): the resolved request, elapsed seconds, and a
#' timestamp -- provenance that rides with the transcription when callers
#' serialize it.
#'
#' @examples
#' \dontrun{
Expand Down Expand Up @@ -57,7 +61,8 @@ stt <- function(file, model = NULL, language = NULL,
resolved_backend <- .choose_backend(backend)

# Dispatch to appropriate backend
if (resolved_backend == "openai") {
started <- Sys.time()
res <- if (resolved_backend == "openai") {
.via_api(
file = file,
model = model,
Expand All @@ -68,5 +73,21 @@ stt <- function(file, model = NULL, language = NULL,
} else {
.via_whisper(file = file, model = model, language = language)
}
# stt produces an R object, not a media file, so the call record rides as
# an attribute (cornball_sidecar v1, as in xtx.api/tts.api); callers that
# serialize the result keep its provenance with it.
attr(res, "call_record") <- list(
cornball_sidecar = 1L, package = "stt.api",
version = as.character(utils::packageVersion("stt.api")),
fn = "stt",
request = Filter(Negate(is.null),
list(file = file, model = model,
language = language,
response_format = response_format,
backend = resolved_backend, prompt = prompt)),
elapsed = round(as.numeric(difftime(Sys.time(), started,
units = "secs")), 2),
created = format(Sys.time(), "%Y-%m-%dT%H:%M:%S%z"))
res
}

4 changes: 4 additions & 0 deletions man/stt.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,10 @@ A list with components:
\item{backend}{Which backend was used ("api" or "whisper").}
\item{raw}{The raw response from the backend.}
}
The result also carries a \code{"call_record"} attribute (cornball_sidecar
v1, as in xtx.api/tts.api): the resolved request, elapsed seconds, and a
timestamp -- provenance that rides with the transcription when callers
serialize it.
}
\description{
Convert an audio file to text using a local whisper backend or
Expand Down