diff --git a/DESCRIPTION b/DESCRIPTION index 87cd55b..b67d533 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: stt.api Title: 'OpenAI' Compatible Speech-to-Text API Client -Version: 0.1.0 +Version: 0.2.0 Authors@R: person("Troy", "Hernandez", email = "troy@cornball.ai", role = c("aut", "cre")) Description: A minimal-dependency R client for 'OpenAI'-compatible speech-to-text @@ -16,4 +16,3 @@ Imports: Suggests: tinytest, whisper -RoxygenNote: 7.3.3 diff --git a/NEWS.md b/NEWS.md index 8c8accd..583b617 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,6 +1,13 @@ -# sttapi 0.1.0 +# stt.api 0.2.0 -* Initial CRAN release +* Remove audio.whisper backend +* Remove gpu.ctl integration +* Remove processx dependency (never implemented) +* Backends are now: whisper (native R torch) and OpenAI-compatible API + +# stt.api 0.1.0 + +* Initial release * Support for OpenAI-compatible speech-to-text APIs * Local server support (LM Studio, OpenWebUI, Whisper containers) * Optional whisper package integration for local transcription diff --git a/R/internal_api.R b/R/internal_api.R index 7906e93..fc1b63a 100644 --- a/R/internal_api.R +++ b/R/internal_api.R @@ -1,133 +1,122 @@ # Internal: Transcribe via OpenAI-compatible API -.via_api <- function( - file, - model = NULL, - language = NULL, - response_format = "json", - prompt = NULL -) { - - base_url <- .get_api_base(required = TRUE) - api_key <- .get_api_key() - timeout <- .get_timeout() - - # Build endpoint URL - - url <- paste0(base_url, "/v1/audio/transcriptions") - - # Prepare multipart form data - form_data <- list( - file = curl::form_file(file) - ) - - if (!is.null(model)) { - form_data$model <- model - } - - if (!is.null(language)) { - form_data$language <- language - } - - if (!is.null(prompt)) { - form_data$prompt <- prompt - } - - form_data$response_format <- response_format - - # Build headers (curl expects "Name: Value" format) - headers <- "Accept: application/json" - if (!is.null(api_key) && nchar(api_key) > 0) { - headers <- c(headers, paste0("Authorization: Bearer ", api_key)) - } - - # Create curl handle - h <- curl::new_handle() - curl::handle_setopt(h, - timeout = timeout, - httpheader = headers - ) - curl::handle_setform(h, .list = form_data) - - # Make request - response <- tryCatch( - curl::curl_fetch_memory(url, handle = h), - error = function(e) { - stop( - "API request failed: ", conditionMessage(e), "\n", - "URL: ", url, - call. = FALSE - ) +.via_api <- function(file, model = NULL, language = NULL, + response_format = "json", prompt = NULL) { + base_url <- .get_api_base(required = TRUE) + api_key <- .get_api_key() + timeout <- .get_timeout() + + # Build endpoint URL + + url <- paste0(base_url, "/v1/audio/transcriptions") + + # Prepare multipart form data + form_data <- list(file = curl::form_file(file)) + + if (!is.null(model)) { + form_data$model <- model } - ) - # Check HTTP status - if (response$status_code >= 400) { - body <- rawToChar(response$content) - error_msg <- tryCatch( - { - parsed <- jsonlite::fromJSON(body, simplifyVector = FALSE) - if (!is.null(parsed$error$message)) { - parsed$error$message - } else { - body - } - }, - error = function(e) body - ) - stop( - "API error (HTTP ", response$status_code, "): ", error_msg, - call. = FALSE + if (!is.null(language)) { + form_data$language <- language + } + + if (!is.null(prompt)) { + form_data$prompt <- prompt + } + + form_data$response_format <- response_format + + # Build headers (curl expects "Name: Value" format) + headers <- "Accept: application/json" + if (!is.null(api_key) && nchar(api_key) > 0) { + headers <- c(headers, paste0("Authorization: Bearer ", api_key)) + } + + # Create curl handle + h <- curl::new_handle() + curl::handle_setopt(h, timeout = timeout, httpheader = headers) + curl::handle_setform(h, .list = form_data) + + # Make request + response <- tryCatch( + curl::curl_fetch_memory(url, handle = h), + error = function(e) { + stop( + "API request failed: ", conditionMessage(e), "\n", + "URL: ", url, + call. = FALSE + ) + } ) - } - - # Parse response - body <- rawToChar(response$content) - - if (response_format == "text") { - return(list( - text = body, - segments = NULL, - language = language, - backend = "api", - raw = body - )) - } - - # Parse JSON response - parsed <- tryCatch( - jsonlite::fromJSON(body, simplifyVector = FALSE), - error = function(e) { - stop("Failed to parse API response as JSON: ", conditionMessage(e), - call. = FALSE) + + # Check HTTP status + if (response$status_code >= 400) { + body <- rawToChar(response$content) + error_msg <- tryCatch( + { + parsed <- jsonlite::fromJSON(body, simplifyVector = FALSE) + if (!is.null(parsed$error$message)) { + parsed$error$message + } else { + body + } + }, + error = function(e) body + ) + stop( + "API error (HTTP ", response$status_code, "): ", error_msg, + call. = FALSE + ) + } + + # Parse response + body <- rawToChar(response$content) + + if (response_format == "text") { + return(list( + text = body, + segments = NULL, + language = language, + backend = "api", + raw = body + )) } - ) - - # Extract segments if available (verbose_json format) - segments <- NULL - if (!is.null(parsed$segments) && length(parsed$segments) > 0) { - segments <- tryCatch( - { - do.call(rbind, lapply(parsed$segments, function(s) { - data.frame( - start = s$start, - end = s$end, - text = s$text, - stringsAsFactors = FALSE - ) + + # Parse JSON response + parsed <- tryCatch( + jsonlite::fromJSON(body, simplifyVector = FALSE), + error = function(e) { + stop("Failed to parse API response as JSON: ", conditionMessage(e), + call. = FALSE) + } + ) + + # Extract segments if available (verbose_json format) + segments <- NULL + if (!is.null(parsed$segments) && length(parsed$segments) > 0) { + segments <- tryCatch( + { + do.call(rbind, lapply(parsed$segments, function(s) { + data.frame( + start = s$start, + end = s$end, + text = s$text, + stringsAsFactors = FALSE + ) })) - }, - error = function(e) NULL + }, + error = function(e) NULL + ) + # Normalize to numeric seconds + segments <- .normalize_segments(segments) + } + + list( + text = parsed$text %||% "", + segments = segments, + language = parsed$language %||% language, + backend = "api", + raw = parsed ) - # Normalize to numeric seconds - segments <- .normalize_segments(segments) - } - - list( - text = parsed$text %||% "", - segments = segments, - language = parsed$language %||% language, - backend = "api", - raw = parsed - ) } diff --git a/R/internal_backend.R b/R/internal_backend.R index 4a6eaab..bcf28bb 100644 --- a/R/internal_backend.R +++ b/R/internal_backend.R @@ -1,25 +1,24 @@ # Internal helper to get API base URL .get_api_base <- function(required = FALSE) { - base <- getOption("stt.api_base") - if (required && is.null(base)) { - stop( - "API base URL not set.\n", - "Use set_stt_base() to configure the endpoint.", - call. = FALSE - ) - } - base + base <- getOption("stt.api_base") + if (required && is.null(base)) { + stop( + "API base URL not set.\n", + "Use set_stt_base() to configure the endpoint.", + call. = FALSE + ) + } + base } # Internal helper to get API key .get_api_key <- function() { - getOption("stt.api_key") + getOption("stt.api_key") } # Internal helper to get timeout .get_timeout <- function() { - - getOption("stt.timeout", default = 60) + getOption("stt.timeout", default = 60) } #' Convert time string to numeric seconds @@ -27,17 +26,21 @@ #' @return Numeric seconds #' @keywords internal .time_to_seconds <- function(time_str) { - if (is.numeric(time_str)) return(time_str) - if (is.na(time_str) || is.null(time_str)) return(NA_real_) + if (is.numeric(time_str)) { + return(time_str) + } + if (is.na(time_str) || is.null(time_str)) { + return(NA_real_) + } - parts <- strsplit(as.character(time_str), ":") [[1]] - if (length(parts) == 3) { - as.numeric(parts[1]) * 3600 + as.numeric(parts[2]) * 60 + as.numeric(parts[3]) - } else if (length(parts) == 2) { - as.numeric(parts[1]) * 60 + as.numeric(parts[2]) - } else { - as.numeric(parts[1]) - } + parts <- strsplit(as.character(time_str), ":")[[1]] + if (length(parts) == 3) { + as.numeric(parts[1]) * 3600 + as.numeric(parts[2]) * 60 + as.numeric(parts[3]) + } else if (length(parts) == 2) { + as.numeric(parts[1]) * 60 + as.numeric(parts[2]) + } else { + as.numeric(parts[1]) + } } #' Normalize segments to use numeric seconds @@ -45,70 +48,72 @@ #' @return Data frame with numeric start/end columns #' @keywords internal .normalize_segments <- function(segments) { - if (is.null(segments) || nrow(segments) == 0) return(segments) + if (is.null(segments) || nrow(segments) == 0) { + return(segments) + } - # Standardize column names to start/end - if ("from" %in% names(segments) && !"start" %in% names(segments)) { - segments$start <- segments$from - } - if ("to" %in% names(segments) && !"end" %in% names(segments)) { - segments$end <- segments$to - } + # Standardize column names to start/end + if ("from" %in% names(segments) && !"start" %in% names(segments)) { + segments$start <- segments$from + } + if ("to" %in% names(segments) && !"end" %in% names(segments)) { + segments$end <- segments$to + } - # Convert to numeric seconds if needed - if ("start" %in% names(segments) && !is.numeric(segments$start)) { - segments$start <- sapply(segments$start, .time_to_seconds) - } - if ("end" %in% names(segments) && !is.numeric(segments$end)) { - segments$end <- sapply(segments$end, .time_to_seconds) - } + # Convert to numeric seconds if needed + if ("start" %in% names(segments) && !is.numeric(segments$start)) { + segments$start <- sapply(segments$start, .time_to_seconds) + } + if ("end" %in% names(segments) && !is.numeric(segments$end)) { + segments$end <- sapply(segments$end, .time_to_seconds) + } - segments + segments } # Choose backend based on availability and user preference .choose_backend <- function(backend = c("auto", "whisper", "openai")) { - backend <- match.arg(backend) - - if (backend == "openai") { - if (is.null(.get_api_base())) { - stop( - "Backend 'openai' requested but no API base URL is set.\n", - "Use set_stt_base() to configure the endpoint.", - call. = FALSE - ) + backend <- match.arg(backend) + + if (backend == "openai") { + if (is.null(.get_api_base())) { + stop( + "Backend 'openai' requested but no API base URL is set.\n", + "Use set_stt_base() to configure the endpoint.", + call. = FALSE + ) + } + return("openai") } - return("openai") - } - if (backend == "whisper") { - if (!.has_whisper()) { - stop( - "Backend 'whisper' requested but package is not installed.\n", - "Install with: install.packages('whisper')", - call. = FALSE - ) + if (backend == "whisper") { + if (!.has_whisper()) { + stop( + "Backend 'whisper' requested but package is not installed.\n", + "Install with: install.packages('whisper')", + call. = FALSE + ) + } + return("whisper") } - return("whisper") - } - # Auto mode: try backends in priority order - # 1. Native whisper (fastest, no external dependencies) - if (.has_whisper()) { - return("whisper") - } + # Auto mode: try backends in priority order + # 1. Native whisper (fastest, no external dependencies) + if (.has_whisper()) { + return("whisper") + } - # 2. OpenAI-compatible API (if configured) - if (!is.null(.get_api_base())) { - return("openai") - } + # 2. OpenAI-compatible API (if configured) + if (!is.null(.get_api_base())) { + return("openai") + } - stop( - "No transcription backend available.\n", - "Either:\n", - " - Install whisper: install.packages('whisper'), or\n", - " - Set an API endpoint with set_stt_base()", - call. = FALSE - ) + stop( + "No transcription backend available.\n", + "Either:\n", + " - Install whisper: install.packages('whisper'), or\n", + " - Set an API endpoint with set_stt_base()", + call. = FALSE + ) } diff --git a/R/internal_whisper.R b/R/internal_whisper.R index 5d562bb..74a5151 100644 --- a/R/internal_whisper.R +++ b/R/internal_whisper.R @@ -5,7 +5,7 @@ # Check if native whisper package is available .has_whisper <- function() { - requireNamespace("whisper", quietly = TRUE) + requireNamespace("whisper", quietly = TRUE) } #' Get or create cached native whisper model @@ -13,25 +13,22 @@ #' @param device Device to use ("auto", "cpu", "cuda") #' @return Loaded whisper model object #' @keywords internal -.get_native_whisper_model <- function( - model, - device = "auto" -) { - cache_key <- paste(model, device, sep = "_") - if (is.null(.native_whisper_cache[[cache_key]])) { - message("Loading native whisper model: ", model, "...") - .native_whisper_cache[[cache_key]] <- tryCatch( - whisper::load_whisper_model(model, device = device), - error = function(e) { - stop( - "Failed to load whisper model '", model, "': ", conditionMessage(e), - call. = FALSE +.get_native_whisper_model <- function(model, device = "auto") { + cache_key <- paste(model, device, sep = "_") + if (is.null(.native_whisper_cache[[cache_key]])) { + message("Loading native whisper model: ", model, "...") + .native_whisper_cache[[cache_key]] <- tryCatch( + whisper::load_whisper_model(model, device = device), + error = function(e) { + stop( + "Failed to load whisper model '", model, "': ", conditionMessage(e), + call. = FALSE + ) + } ) - } - ) - message("Native whisper model loaded and cached.") - } - .native_whisper_cache[[cache_key]] + message("Native whisper model loaded and cached.") + } + .native_whisper_cache[[cache_key]] } #' Clear native whisper model cache @@ -41,15 +38,15 @@ #' #' @export clear_native_whisper_cache <- function() { - models <- ls(.native_whisper_cache) - if (length(models) > 0) { - rm(list = models, envir = .native_whisper_cache) - gc() - message("Cleared ", length(models), " cached native whisper model(s).") - } else { - message("Native whisper cache is empty.") - } - invisible(NULL) + models <- ls(.native_whisper_cache) + if (length(models) > 0) { + rm(list = models, envir = .native_whisper_cache) + gc() + message("Cleared ", length(models), " cached native whisper model(s).") + } else { + message("Native whisper cache is empty.") + } + invisible(NULL) } #' Internal: Transcribe via native whisper package @@ -62,75 +59,70 @@ clear_native_whisper_cache <- function() { #' @param language Character or NULL. Language code for transcription. #' @return List with transcription results in normalized format. #' @keywords internal -.via_whisper <- function( - file, - model = NULL, - language = NULL -) { - - if (!.has_whisper()) { - stop( - "whisper package is not installed.\n", - "Install with: remotes::install_github('cornball-ai/whisper')", - call. = FALSE - ) - } +.via_whisper <- function(file, model = NULL, language = NULL) { + if (!.has_whisper()) { + stop( + "whisper package is not installed.\n", + "Install with: remotes::install_github('cornball-ai/whisper')", + call. = FALSE + ) + } - # Default model if not specified - if (is.null(model)) { - model <- "medium" - } + # Default model if not specified + if (is.null(model)) { + model <- "medium" + } - # Default language - if (is.null(language)) { - language <- "en" - } + # Default language + if (is.null(language)) { + language <- "en" + } - # Run transcription using whisper::transcribe directly - # (it handles model loading/caching internally) - result <- tryCatch( - whisper::transcribe( - file = file, - model = model, - language = language, - word_timestamps = TRUE, - verbose = FALSE - ), - error = function(e) { - stop( - "Transcription failed: ", conditionMessage(e), - call. = FALSE - ) + # Run transcription using whisper::transcribe directly + # (it handles model loading/caching internally) + result <- tryCatch( + whisper::transcribe( + file = file, + model = model, + language = language, + word_timestamps = TRUE, + verbose = FALSE + ), + error = function(e) { + stop("Transcription failed: ", conditionMessage(e), call. = FALSE) } - ) + ) - # Build segments data frame if available - segments <- NULL - if (!is.null(result$segments) && nrow(result$segments) > 0) { - segments <- result$segments - # Normalize column names (whisper returns start/end already) - segments <- .normalize_segments(segments) - } + # Build segments data frame if available + segments <- NULL + if (!is.null(result$segments) && nrow(result$segments) > 0) { + segments <- result$segments + # Normalize column names (whisper returns start/end already) + segments <- .normalize_segments(segments) + } - out <- list( - text = result$text, - segments = segments, - language = result$language %||% language, - backend = "whisper", - raw = result - ) + out <- list( + text = result$text, + segments = segments, + language = result$language %||% language, + backend = "whisper", + raw = result + ) - # Pass through word-level timestamps if available - if (!is.null(result$words) && nrow(result$words) > 0) { - out$words <- result$words - } + # Pass through word-level timestamps if available + if (!is.null(result$words) && nrow(result$words) > 0) { + out$words <- result$words + } - out + out } # Null coalescing operator if not available -`%||%` <- function( - x, - y -) if (is.null(x)) y else x +`%||%` <- function(x, y) + +if (is.null(x)) { + y +} else { + x +} diff --git a/R/set_stt_base.R b/R/set_stt_base.R index 7dbf436..fea4240 100644 --- a/R/set_stt_base.R +++ b/R/set_stt_base.R @@ -15,20 +15,20 @@ #' #' @export set_stt_base <- function(url) { - if (!is.null(url) && !is.character(url)) { - stop("url must be a character string or NULL", call. = FALSE) - } - if (!is.null(url) && length(url) != 1) { - stop("url must be a single string", call. = FALSE) - } + if (!is.null(url) && !is.character(url)) { + stop("url must be a character string or NULL", call. = FALSE) + } + if (!is.null(url) && length(url) != 1) { + stop("url must be a single string", call. = FALSE) + } - # Remove trailing slash if present - if (!is.null(url)) { - url <- sub("/$", "", url) - } + # Remove trailing slash if present + if (!is.null(url)) { + url <- sub("/$", "", url) + } - old <- getOption("stt.api_base") - options(stt.api_base = url) - invisible(old) + old <- getOption("stt.api_base") + options(stt.api_base = url) + invisible(old) } diff --git a/R/set_stt_key.R b/R/set_stt_key.R index c84e436..108669c 100644 --- a/R/set_stt_key.R +++ b/R/set_stt_key.R @@ -14,15 +14,15 @@ #' #' @export set_stt_key <- function(key) { - if (!is.null(key) && !is.character(key)) { - stop("key must be a character string or NULL", call. = FALSE) - } - if (!is.null(key) && length(key) != 1) { - stop("key must be a single string", call. = FALSE) - } + if (!is.null(key) && !is.character(key)) { + stop("key must be a character string or NULL", call. = FALSE) + } + if (!is.null(key) && length(key) != 1) { + stop("key must be a single string", call. = FALSE) + } - old <- getOption("stt.api_key") - options(stt.api_key = key) - invisible(old) + old <- getOption("stt.api_key") + options(stt.api_key = key) + invisible(old) } diff --git a/R/stt.R b/R/stt.R index 90e91bf..c4928f5 100644 --- a/R/stt.R +++ b/R/stt.R @@ -42,41 +42,31 @@ #' } #' #' @export -stt <- function( - file, - model = NULL, - language = NULL, - response_format = c("json", "text", "verbose_json"), - backend = c("auto", "whisper", "openai"), - prompt = NULL -) { +stt <- function(file, model = NULL, language = NULL, + response_format = c("json", "text", "verbose_json"), + backend = c("auto", "whisper", "openai"), prompt = NULL) { + # Validate file + if (!file.exists(file)) { + stop("File not found: ", file, call. = FALSE) + } - # Validate file - if (!file.exists(file)) { - stop("File not found: ", file, call. = FALSE) - } + response_format <- match.arg(response_format) + backend <- match.arg(backend) - response_format <- match.arg(response_format) - backend <- match.arg(backend) + # Resolve backend + resolved_backend <- .choose_backend(backend) - # Resolve backend - resolved_backend <- .choose_backend(backend) - - # Dispatch to appropriate backend - if (resolved_backend == "openai") { - .via_api( - file = file, - model = model, - language = language, - response_format = response_format, - prompt = prompt - ) - } else { - .via_whisper( - file = file, - model = model, - language = language - ) - } + # Dispatch to appropriate backend + if (resolved_backend == "openai") { + .via_api( + file = file, + model = model, + language = language, + response_format = response_format, + prompt = prompt + ) + } else { + .via_whisper(file = file, model = model, language = language) + } } diff --git a/R/stt_health.R b/R/stt_health.R index ef9b9b3..6941a9d 100644 --- a/R/stt_health.R +++ b/R/stt_health.R @@ -20,72 +20,70 @@ #' #' @export stt_health <- function() { + # Check whisper package first + if (.has_whisper()) { + return(list( + ok = TRUE, + backend = "whisper", + message = "whisper package is available" + )) + } - # Check whisper package first - if (.has_whisper()) { - return(list( - ok = TRUE, - backend = "whisper", - message = "whisper package is available" - )) - } - - # Check API backend - api_base <- .get_api_base() - if (!is.null(api_base)) { - return(.check_api_health(api_base)) - } + # Check API backend + api_base <- .get_api_base() + if (!is.null(api_base)) { + return(.check_api_health(api_base)) + } - # No backend available - list( - ok = FALSE, - backend = NULL, - message = "No backend available. Install whisper or set stt.api_base." - ) + # No backend available + list( + ok = FALSE, + backend = NULL, + message = "No backend available. Install whisper or set stt.api_base." + ) } # Internal: Check API endpoint health .check_api_health <- function(base_url) { + # Try common health endpoints (including /v1/models which OpenAI supports) + endpoints <- c("/v1/models", "/health", "/v1/health", "/") + api_key <- .get_api_key() - # Try common health endpoints (including /v1/models which OpenAI supports) - endpoints <- c("/v1/models", "/health", "/v1/health", "/") - api_key <- .get_api_key() - - # Build headers (curl expects "Name: Value" format) - headers <- "Accept: application/json" - if (!is.null(api_key) && nchar(api_key) > 0) { - headers <- c(headers, paste0("Authorization: Bearer ", api_key)) - } + # Build headers (curl expects "Name: Value" format) + headers <- "Accept: application/json" + if (!is.null(api_key) && nchar(api_key) > 0) { + headers <- c(headers, paste0("Authorization: Bearer ", api_key)) + } - for (endpoint in endpoints) { - url <- paste0(base_url, endpoint) + for (endpoint in endpoints) { + url <- paste0(base_url, endpoint) - h <- curl::new_handle() - curl::handle_setopt(h, - timeout = 5, - httpheader = headers, - nobody = FALSE - ) + h <- curl::new_handle() + curl::handle_setopt(h, + timeout = 5, + httpheader = headers, + nobody = FALSE + ) - response <- tryCatch( - curl::curl_fetch_memory(url, handle = h), - error = function(e) NULL - ) + response <- tryCatch( + curl::curl_fetch_memory(url, handle = h), + error = function(e) NULL + ) - if (!is.null(response) && response$status_code < 400) { - return(list( - ok = TRUE, - backend = "api", - message = paste0("API endpoint responding at ", base_url) - )) + if (!is.null(response) && response$status_code < 400) { + return(list( + ok = TRUE, + backend = "api", + message = paste0("API endpoint responding at ", base_url) + )) + } } - } - # API not responding - list( - ok = FALSE, - backend = "api", - message = paste0("API endpoint not responding at ", base_url) - ) + # API not responding + list( + ok = FALSE, + backend = "api", + message = paste0("API endpoint not responding at ", base_url) + ) } diff --git a/R/zzz.R b/R/zzz.R index e68c8fe..948fd70 100644 --- a/R/zzz.R +++ b/R/zzz.R @@ -1,21 +1,17 @@ -.onLoad <- function( - libname, - pkgname -) { +.onLoad <- function(libname, pkgname) { + op <- options() + op_stt <- list( + stt.api_base = NULL, + stt.api_key = NULL, + stt.timeout = 60, + stt.backend = "auto" + ) - op <- options() - op_stt <- list( - stt.api_base = NULL, - stt.api_key = NULL, - stt.timeout = 60, - stt.backend = "auto" - ) + toset <- !(names(op_stt) %in% names(op)) + if (any(toset)) { + options(op_stt[toset]) + } - toset <- !(names(op_stt) %in% names(op)) - if (any(toset)) { - options(op_stt[toset]) - } - - invisible() + invisible() }