Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,6 @@ Imports:
jsonlite
Suggests:
audio.whisper,
fal.api,
gpu.ctl,
processx,
tinytest,
Expand Down
27 changes: 2 additions & 25 deletions R/internal_backend.R
Original file line number Diff line number Diff line change
Expand Up @@ -72,13 +72,8 @@
segments
}

# Check if fal.api is available
.has_fal <- function() {
requireNamespace("fal.api", quietly = TRUE)
}

# Choose backend based on availability and user preference
.choose_backend <- function(backend = c("auto", "whisper", "audio.whisper", "openai", "fal")) {
.choose_backend <- function(backend = c("auto", "whisper", "audio.whisper", "openai")) {
backend <- match.arg(backend)

if (backend == "openai") {
Expand Down Expand Up @@ -117,18 +112,6 @@
return("audio.whisper")
}

if (backend == "fal") {
# Explicit fal.api request - verify it's available
if (!.has_fal()) {
stop(
"Backend 'fal' requested but fal.api package is not installed.\n",
"Install with: remotes::install_github('cornball-ai/fal.api')",
call. = FALSE
)
}
return("fal")
}

# Auto mode: try backends in priority order
# 1. Native whisper (fastest, no external dependencies)
if (.has_whisper()) {
Expand All @@ -145,18 +128,12 @@
return("openai")
}

# 4. fal.api (cloud fallback)
if (.has_fal()) {
return("fal")
}

stop(
"No transcription backend available.\n",
"Either:\n",
" - Install whisper: remotes::install_github('cornball-ai/whisper'), or\n",
" - Install audio.whisper: install.packages('audio.whisper', repos = 'https://bnosac.github.io/drat'), or\n",
" - Set an API endpoint with set_stt_base(), or\n",
" - Install fal.api: remotes::install_github('cornball-ai/fal.api')",
" - Set an API endpoint with set_stt_base()",
call. = FALSE
)
}
Expand Down
59 changes: 0 additions & 59 deletions R/internal_fal.R

This file was deleted.

6 changes: 3 additions & 3 deletions R/internal_whisper.R
Original file line number Diff line number Diff line change
Expand Up @@ -120,9 +120,9 @@ clear_native_whisper_cache <- function() {
raw = result
)

# Pass through word-level tokens if available
if (!is.null(result$words) && nrow(result$words) > 0 && !is.null(segments)) {
out$tokens <- subtitles::words_to_tokens(result$words, segments)
# Pass through word-level timestamps if available
if (!is.null(result$words) && nrow(result$words) > 0) {
out$words <- result$words
}

out
Expand Down
12 changes: 3 additions & 9 deletions R/stt.R
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
#' @param response_format Response format for API backend. One of "text",
#' "json", or "verbose_json". Ignored for audio.whisper backend.
#' @param backend Which backend to use: "auto" (default), "whisper",
#' "audio.whisper", "openai", or "fal". Auto mode tries native whisper first,
#' then audio.whisper, then openai API (if configured), then fal.api.
#' "audio.whisper", or "openai". Auto mode tries native whisper first,
#' then audio.whisper, then openai API (if configured).
#'
#' @return A list with components:
#' \describe{
Expand Down Expand Up @@ -52,7 +52,7 @@ stt <- function(
model = NULL,
language = NULL,
response_format = c("json", "text", "verbose_json"),
backend = c("auto", "whisper", "audio.whisper", "openai", "fal"),
backend = c("auto", "whisper", "audio.whisper", "openai"),
prompt = NULL
) {

Expand Down Expand Up @@ -88,12 +88,6 @@ stt <- function(
model = model,
language = language
)
} else if (resolved_backend == "fal") {
.via_fal(
file = file,
model = model,
language = language
)
} else {
# audio.whisper backend
.via_audio_whisper(
Expand Down
1 change: 0 additions & 1 deletion cran-comments.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,5 @@
The NOTE about "Suggests or Enhances not in mainstream repositories" refers to:
- `audio.whisper`: Available on GitHub (bnosac/audio.whisper)
- `gpu.ctl`: Internal package for GPU resource management
- `whisper`: Available on GitHub (cornball-ai/whisper), submitted to CRAN

These are optional backends and the package works without them.
7 changes: 1 addition & 6 deletions man/dot-via_audio_whisper.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -3,12 +3,7 @@
\alias{.via_audio_whisper}
\title{Internal: Transcribe via audio.whisper package}
\usage{
.via_audio_whisper(
file,
model = NULL,
language = NULL,
token_timestamps = FALSE
)
.via_audio_whisper(file, model = NULL, language = NULL, token_timestamps = FALSE)
}
\arguments{
\item{file}{Character. Path to the audio file to transcribe.}
Expand Down
15 changes: 5 additions & 10 deletions man/stt.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,9 @@
\alias{stt}
\title{Speech to Text}
\usage{
stt(
file,
model = NULL,
language = NULL,
response_format = c("json", "text", "verbose_json"),
backend = c("auto", "whisper", "audio.whisper", "openai", "fal"),
prompt = NULL
)
stt(file, model = NULL, language = NULL,
response_format = c("json", "text", "verbose_json"),
backend = c("auto", "whisper", "audio.whisper", "openai"), prompt = NULL)
}
\arguments{
\item{file}{Path to the audio file to convert.}
Expand All @@ -27,8 +22,8 @@ to improve transcription accuracy.}
"json", or "verbose_json". Ignored for audio.whisper backend.}

\item{backend}{Which backend to use: "auto" (default), "whisper",
"audio.whisper", "openai", or "fal". Auto mode tries native whisper first,
then audio.whisper, then openai API (if configured), then fal.api.}
"audio.whisper", or "openai". Auto mode tries native whisper first,
then audio.whisper, then openai API (if configured).}

\item{prompt}{Optional text to guide the transcription. For API backend,
this is passed as initial_prompt to help with spelling of names,
Expand Down