diff --git a/DESCRIPTION b/DESCRIPTION index 19f8ce1..62e40ec 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,5 +1,5 @@ Package: finna -Title: Title: R Package for Finna API +Title: Title: Access the 'Finna' API Version: 0.1.1 Date: 2025-01-10 Authors@R: c( @@ -9,7 +9,7 @@ Authors@R: c( comment = c(ORCID = "0000-0001-5537-637X")) ) Maintainer: Akewak Jeba -Description: Provides functions to access and retrieve metadata from the Finna API, which aggregates content from Finnish archives, libraries, and museums. +Description: Provides functions to access and retrieve metadata from the 'Finna' API , which aggregates content from Finnish archives, libraries, and museums. License: BSD_2_clause + file LICENSE Encoding: UTF-8 Imports: @@ -22,14 +22,8 @@ Imports: readr, tibble, curl, - rlang, - stringr, - tidyr, progress, - purrr, - tm, - stopwords, - wordcloud2 + purrr Suggests: testthat (>= 3.0.0), rmarkdown, diff --git a/NAMESPACE b/NAMESPACE index d01b8a8..163111e 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -15,9 +15,7 @@ export(load_offline_data) export(refine_metadata) export(save_for_offline) export(search_finna) -export(search_finna_from_file) export(search_publisher) -export(timeline_relative) export(top_plot) import(dplyr) import(progress) @@ -32,10 +30,16 @@ importFrom(dplyr,mutate) importFrom(dplyr,select) importFrom(dplyr,summarise) importFrom(ggplot2,aes) +importFrom(ggplot2,coord_flip) +importFrom(ggplot2,geom_bar) importFrom(ggplot2,geom_line) +importFrom(ggplot2,geom_text) importFrom(ggplot2,ggplot) importFrom(ggplot2,labs) +importFrom(ggplot2,scale_y_log10) importFrom(ggplot2,theme_minimal) +importFrom(ggplot2,xlab) +importFrom(ggplot2,ylab) importFrom(glue,glue) importFrom(httr,GET) importFrom(httr,content) diff --git a/R/enrich_author_name.R b/R/enrich_author_name.R index 405f333..db1c52b 100644 --- a/R/enrich_author_name.R +++ b/R/enrich_author_name.R @@ -1,13 +1,13 @@ -#' @title Enrich Author Name from Finna API and Save Results +#' @title Enrich Author Name from 'Finna' API and Save Results #' #' @description #' This function reads a CSV file from a URL containing Melinda IDs and author names. -#' If the author name is missing (NA), it searches Finna for the corresponding Melinda ID +#' If the author name is missing (NA), it searches the 'Finna' API for the corresponding Melinda ID #' to retrieve and update the author name. The updated data is saved in a CSV file. #' #' @param url A character string specifying the URL of the CSV file with Melinda IDs and author names. #' @param output_file A character string specifying the output CSV file name. -#' @return A tibble of the updated data, and the file is saved to the `data` directory. +#' @return A tibble with updated author names. The file is saved to a temporary directory using \code{tempdir()}. #' @importFrom readr read_csv write_csv cols col_character #' @importFrom dplyr mutate if_else #' @importFrom purrr map_chr @@ -15,7 +15,7 @@ #' @examples #' \dontrun{ #' enrich_author_name(url = "https://example/na_author_rows.csv", -#' output_file = "updated_na_author_rows.csv") +#' output_file = "updated_na_author_rows.csv") #' } enrich_author_name <- function(url, output_file = "updated_na_author_rows.csv") { @@ -38,10 +38,6 @@ enrich_author_name <- function(url, output_file = "updated_na_author_rows.csv") if (!is.null(results) && nrow(results) > 0) { authors <- results$Author[1] return(authors) - } else if (httr::status_code(response) == 429) { - message("Rate limit reached. Waiting for ", wait_time, " seconds before retrying.") - Sys.sleep(wait_time) - attempt <- attempt + 1 } else { return(NA) } @@ -55,7 +51,7 @@ enrich_author_name <- function(url, output_file = "updated_na_author_rows.csv") author_name = readr::col_character() )) - # Step 2: Update `author_name` if it is 'NA' by fetching from Finna + # Step 2: Update `author_name` if it is 'NA' by fetching from 'Finna' data <- data %>% dplyr::mutate( updated_author_name = dplyr::if_else( @@ -70,17 +66,10 @@ enrich_author_name <- function(url, output_file = "updated_na_author_rows.csv") ) ) - # Ensure the 'data' directory exists - if (!dir.exists("data")) { - dir.create("data") - } - - # Define output file path - output_csv_path <- file.path("data", output_file) - - # Step 3: Save the updated data as CSV + # Step 3: Save the updated data as CSV in a temporary directory + output_csv_path <- file.path(tempdir(), output_file) readr::write_csv(data, output_csv_path) message("CSV file with updated author names saved to ", output_csv_path) - return(data) # Return the updated tibble as well + return(data) # Return the updated tibble } diff --git a/R/finna_cite.R b/R/finna_cite.R index f0a370b..1e67b31 100644 --- a/R/finna_cite.R +++ b/R/finna_cite.R @@ -6,6 +6,7 @@ #' @param result The Finna collection result as a tibble. #' @param index The index of the collection to cite (numeric). #' @param style The citation style to use (default: "citation"). See \code{\link[utils]{bibentry}}. +#' @return A bibliographic entry (\code{bibentry}) printed in the specified style. #' @export finna_cite <- function(result, index, style = "citation") { # Validate the input structure diff --git a/R/refine_metadata.R b/R/refine_metadata.R index 470f786..6ce9696 100644 --- a/R/refine_metadata.R +++ b/R/refine_metadata.R @@ -1,10 +1,18 @@ #' @title Refine Finna Metadata #' #' @description -#' Refines the Finna metadata tibble by keeping relevant fields and cleaning up missing values. -#' The purpose of the refine_metadata function is to: Ensure completeness by filling in missing -#' values with placeholder text, Standardize key metadata fields for easier analysis, Select only -#' the most relevant fields, simplifying the dataset. +#' The `refine_metadata` function cleans and standardizes Finna metadata by: +#' - **Validating Required Fields:** Checks for the presence of key metadata fields and returns `NULL` if any are missing. +#' - **Handling Missing Values:** Replaces `NA` values in critical fields with descriptive placeholder text (e.g., "Unknown Title"). +#' - **Selecting Relevant Fields:** Keeps only the following fields for streamlined analysis: +#' - `Title`: The title of the resource. +#' - `Author`: The creator or author of the resource. +#' - `Year`: The publication or release year. +#' - `Language`: The language of the resource. +#' - `Formats`: The format(s) of the resource (e.g., Book, Audio). +#' - `Subjects`: The subject keywords or classifications. +#' - `Library`: The owning library or institution. +#' - `Series`: The series or collection the resource belongs to. #' #' @param data A tibble containing raw Finna metadata. #' @return A tibble with selected, cleaned metadata fields, or NULL if required fields are missing. diff --git a/R/save_for_offline.R b/R/save_for_offline.R index 44c4af8..a27c1fd 100644 --- a/R/save_for_offline.R +++ b/R/save_for_offline.R @@ -1,13 +1,13 @@ -#' @title Save Finna Search Results for Offline Access +#' @title Save 'Finna' Search Results for Offline Access #' #' @description -#' This function saves Finna search results and metadata locally to a file in `.rds` format, +#' This function saves 'Finna' search results and metadata locally to a file in `.rds` format, #' allowing users to access and analyze the data offline without an internet connection. #' -#' @param data A tibble or data frame containing the Finna search results. +#' @param data A tibble or data frame containing the 'Finna' search results. #' @param file_name A string representing the name of the file to save. #' The function automatically appends ".rds" to the name if not already included. -#' @return A message confirming that the data has been saved successfully. +#' @return No return value. Called for its side effects of saving the data to a file. #' @examples #' \dontrun{ #' search_results <- search_finna("sibelius") @@ -15,16 +15,13 @@ #' } #' @export save_for_offline <- function(data, file_name = "offline_search_results") { - # Ensure the data directory exists - dir.create("data", showWarnings = FALSE) - # Ensure the file name has the .rds extension if (!grepl("\\.rds$", file_name)) { file_name <- paste0(file_name, ".rds") } - # Define the full file path - full_path <- file.path("data", file_name) + # Define the full file path in a temporary directory + full_path <- file.path(tempdir(), file_name) if (!is.null(data) && nrow(data) > 0) { saveRDS(data, full_path) @@ -34,28 +31,30 @@ save_for_offline <- function(data, file_name = "offline_search_results") { } } -#' Load Finna Search Results from Offline File +#' @title Load 'Finna' Search Results from Offline File #' #' @description -#' This function loads previously saved Finna search results from a local `.rds` file for offline access. +#' This function loads previously saved 'Finna' search results from a local `.rds` file for offline access. #' #' @param file_name A string representing the name of the file to load. #' The function automatically appends ".rds" if not already included. -#' @return The loaded search results in tibble format. -#' @export +#' @return A tibble or data frame containing the loaded search results. #' @examples #' \dontrun{ +#' search_results <- search_finna("sibelius") +#' save_for_offline(search_results, "sibelius_search_results") #' offline_data <- load_offline_data("sibelius_search_results") #' print(offline_data) #' } +#' @export load_offline_data <- function(file_name = "offline_search_results") { # Ensure the file name has the .rds extension if (!grepl("\\.rds$", file_name)) { file_name <- paste0(file_name, ".rds") } - # Define the full file path - full_path <- file.path("data", file_name) + # Define the full file path in the temporary directory + full_path <- file.path(tempdir(), file_name) if (file.exists(full_path)) { data <- readRDS(full_path) @@ -65,11 +64,3 @@ load_offline_data <- function(file_name = "offline_search_results") { stop("File not found. Please ensure the file exists and try again.") } } - -# Example Usage: -# search_results <- search_finna("sibelius") -# save_for_offline(search_results, "sibelius_search_results") # Saves as "data/sibelius_search_results.rds" - -# Load the search results back later -# offline_results <- load_offline_data("sibelius_search_results") -# print(offline_results) diff --git a/R/search_finna_from_file.R b/R/search_finna_from_file.R deleted file mode 100644 index 00a08d1..0000000 --- a/R/search_finna_from_file.R +++ /dev/null @@ -1,67 +0,0 @@ -#' @title Search Finna using text from a file (extended version) -#' -#' @description -#' Reads text from a file, processes it, and searches it in Finna using the `search_finna` function. -#' -#' @param file_path The path to the text file. -#' @param limit The number of results to return from Finna. Defaults to 10. -#' @param lng Language for returned translated strings. Defaults to "fi". -#' @param query_limit The maximum length of the query string allowed for the API. Defaults to 500 characters. -#' @return A tibble containing the Finna search results. -#' @export -search_finna_from_file <- function(file_path, limit = 10, lng = "fi", query_limit = 500) { - # Read the content of the file - file_content <- tryCatch({ - readLines(file_path, warn = FALSE) - }, error = function(e) { - stop("Failed to read the file: ", e$message, call. = FALSE) - }, warning = function(w) { - stop("Failed to read the file: ", w$message, call. = FALSE) - }) - - # Concatenate the lines into a single string (in case it's multi-line) - full_text <- paste(file_content, collapse = " ") - - # Preprocess the text (e.g., remove non-alphanumeric characters, excessive whitespace) - clean_text <- gsub("[^[:alnum:][:space:]]", "", full_text) - clean_text <- gsub("\\s+", " ", clean_text) - - # Split the text into chunks that fit within the query limit - text_chunks <- strsplit(clean_text, "(?<=.{500})\\s", perl = TRUE)[[1]] - - # Initialize an empty tibble to store results - all_results <- tibble::tibble() - - # Loop through the chunks and perform a search for each - for (chunk in text_chunks) { - query <- substr(chunk, 1, query_limit) # Truncate to query limit if necessary - cat("Performing search with query:", query, "\n") # For logging - - # Perform a search on Finna using the content from the chunk - results <- tryCatch({ - search_finna(query, limit = limit, lng = lng) - }, error = function(e) { - warning("Search failed for query: ", query, ". Error: ", e$message, call. = FALSE) - return(tibble::tibble()) - }) - - # Append valid results to all_results - if (nrow(results) > 0) { - all_results <- dplyr::bind_rows(all_results, results) - } - } - - # Return all combined results or throw an error if no results found - if (nrow(all_results) == 0) { - stop("No results found for the given file content.", call. = FALSE) - } else { - return(all_results) - } -} - - -# Example usage: -# Assuming the file contains the search query -#file_path <- "/Users/akasia/textfile.txt" -#finna_results <- search_finna_from_file(file_path, limit = 20, lng = "en-gb") -#print(finna_results) diff --git a/R/timeline_relative.R b/R/timeline_relative.R deleted file mode 100644 index b6094a6..0000000 --- a/R/timeline_relative.R +++ /dev/null @@ -1,39 +0,0 @@ -#' @title Retrieve Relative Timeline -#' @description Relative timeline for selected variable between two data sets. -#' @param x First data frame -#' @param y Second data frame -#' @param myfield Numeric field to summarize in the timeline (x/y). The number of entries (title count) per decade is used by default. If this argument is used, the sum of entries per decade for this field is given. -#' @param time.window Time window for the timeline in years. Default: 10 (publication decade). -#' @return data.frame -#' @export -#' @author Leo Lahti \email{leo.lahti@@iki.fi} -#' @references See citation("bibliographica") -#' @examples \dontrun{timeline_relative(df, "gatherings")} -#' @keywords utilities -timeline_relative <- function (x, y, myfield, time.window = 10) { - - publication_time <- X <- Y <- absolute <- relative <- group <- NULL - nmin <- 0; mode <- "absolute" - - # Stats in the first data set - tab0 <- timeline(x, field = myfield, nmin = nmin, mode = mode, time.window = time.window) - tab0$group <- rep("X", nrow(tab0)) - - # Stats in the second data set - tab <- timeline(y, field = myfield, nmin = nmin, mode = mode, time.window = time.window) - tab$group <- rep("Y", nrow(tab)) - - # Limit the analysis on the same time window - tab <- subset(tab, publication_time >= min(tab0$publication_time) & - publication_time <= max(tab0$publication_time)) - - df <- bind_rows(tab, tab0) - df$group <- factor(df$group) - df <- df %>% select(publication_time, group, absolute) %>% - spread(key = "group", value = "absolute", fill = 0) - - df <- df %>% mutate(fraction = 100 * X/Y) - - df - -} diff --git a/R/top_plot.R b/R/top_plot.R index b464ce5..a8c6f8f 100644 --- a/R/top_plot.R +++ b/R/top_plot.R @@ -10,6 +10,7 @@ #' @param show.percentage Show the proportion of each category with respect to the total sample count. #' @param log10 Show the counts on log10 scale (default FALSE) #' @return ggplot object +#' @importFrom ggplot2 geom_bar coord_flip xlab ylab geom_text scale_y_log10 #' @export #' @author Leo Lahti \email{leo.lahti@@iki.fi} #' @references See citation("bibliographica") diff --git a/man/.Rapp.history b/man/.Rapp.history new file mode 100644 index 0000000..e69de29 diff --git a/man/enrich_author_name.Rd b/man/enrich_author_name.Rd index f014fb8..29d7238 100644 --- a/man/enrich_author_name.Rd +++ b/man/enrich_author_name.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/enrich_author_name.R \name{enrich_author_name} \alias{enrich_author_name} -\title{Enrich Author Name from Finna API and Save Results} +\title{Enrich Author Name from 'Finna' API and Save Results} \usage{ enrich_author_name(url, output_file = "updated_na_author_rows.csv") } @@ -12,16 +12,16 @@ enrich_author_name(url, output_file = "updated_na_author_rows.csv") \item{output_file}{A character string specifying the output CSV file name.} } \value{ -A tibble of the updated data, and the file is saved to the \code{data} directory. +A tibble with updated author names. The file is saved to a temporary directory using \code{tempdir()}. } \description{ This function reads a CSV file from a URL containing Melinda IDs and author names. -If the author name is missing (NA), it searches Finna for the corresponding Melinda ID +If the author name is missing (NA), it searches the 'Finna' API for the corresponding Melinda ID to retrieve and update the author name. The updated data is saved in a CSV file. } \examples{ \dontrun{ enrich_author_name(url = "https://example/na_author_rows.csv", -output_file = "updated_na_author_rows.csv") + output_file = "updated_na_author_rows.csv") } } diff --git a/man/finna_cite.Rd b/man/finna_cite.Rd index 2be9afe..f781d6d 100644 --- a/man/finna_cite.Rd +++ b/man/finna_cite.Rd @@ -13,6 +13,9 @@ finna_cite(result, index, style = "citation") \item{style}{The citation style to use (default: "citation"). See \code{\link[utils]{bibentry}}.} } +\value{ +A bibliographic entry (\code{bibentry}) printed in the specified style. +} \description{ Automatically generates a citation for a Finna collection result. } diff --git a/man/load_offline_data.Rd b/man/load_offline_data.Rd index f4cc5c2..f5bfe4c 100644 --- a/man/load_offline_data.Rd +++ b/man/load_offline_data.Rd @@ -2,7 +2,7 @@ % Please edit documentation in R/save_for_offline.R \name{load_offline_data} \alias{load_offline_data} -\title{Load Finna Search Results from Offline File} +\title{Load 'Finna' Search Results from Offline File} \usage{ load_offline_data(file_name = "offline_search_results") } @@ -11,13 +11,15 @@ load_offline_data(file_name = "offline_search_results") The function automatically appends ".rds" if not already included.} } \value{ -The loaded search results in tibble format. +A tibble or data frame containing the loaded search results. } \description{ -This function loads previously saved Finna search results from a local \code{.rds} file for offline access. +This function loads previously saved 'Finna' search results from a local \code{.rds} file for offline access. } \examples{ \dontrun{ +search_results <- search_finna("sibelius") +save_for_offline(search_results, "sibelius_search_results") offline_data <- load_offline_data("sibelius_search_results") print(offline_data) } diff --git a/man/refine_metadata.Rd b/man/refine_metadata.Rd index f998e25..4e1f02d 100644 --- a/man/refine_metadata.Rd +++ b/man/refine_metadata.Rd @@ -13,10 +13,22 @@ refine_metadata(data) A tibble with selected, cleaned metadata fields, or NULL if required fields are missing. } \description{ -Refines the Finna metadata tibble by keeping relevant fields and cleaning up missing values. -The purpose of the refine_metadata function is to: Ensure completeness by filling in missing -values with placeholder text, Standardize key metadata fields for easier analysis, Select only -the most relevant fields, simplifying the dataset. +The \code{refine_metadata} function cleans and standardizes Finna metadata by: +\itemize{ +\item \strong{Validating Required Fields:} Checks for the presence of key metadata fields and returns \code{NULL} if any are missing. +\item \strong{Handling Missing Values:} Replaces \code{NA} values in critical fields with descriptive placeholder text (e.g., "Unknown Title"). +\item \strong{Selecting Relevant Fields:} Keeps only the following fields for streamlined analysis: +\itemize{ +\item \code{Title}: The title of the resource. +\item \code{Author}: The creator or author of the resource. +\item \code{Year}: The publication or release year. +\item \code{Language}: The language of the resource. +\item \code{Formats}: The format(s) of the resource (e.g., Book, Audio). +\item \code{Subjects}: The subject keywords or classifications. +\item \code{Library}: The owning library or institution. +\item \code{Series}: The series or collection the resource belongs to. +} +} } \examples{ library(finna) diff --git a/man/save_for_offline.Rd b/man/save_for_offline.Rd index 5dc8761..e70dc7b 100644 --- a/man/save_for_offline.Rd +++ b/man/save_for_offline.Rd @@ -2,21 +2,21 @@ % Please edit documentation in R/save_for_offline.R \name{save_for_offline} \alias{save_for_offline} -\title{Save Finna Search Results for Offline Access} +\title{Save 'Finna' Search Results for Offline Access} \usage{ save_for_offline(data, file_name = "offline_search_results") } \arguments{ -\item{data}{A tibble or data frame containing the Finna search results.} +\item{data}{A tibble or data frame containing the 'Finna' search results.} \item{file_name}{A string representing the name of the file to save. The function automatically appends ".rds" to the name if not already included.} } \value{ -A message confirming that the data has been saved successfully. +No return value. Called for its side effects of saving the data to a file. } \description{ -This function saves Finna search results and metadata locally to a file in \code{.rds} format, +This function saves 'Finna' search results and metadata locally to a file in \code{.rds} format, allowing users to access and analyze the data offline without an internet connection. } \examples{ diff --git a/man/search_finna_from_file.Rd b/man/search_finna_from_file.Rd deleted file mode 100644 index 6c57565..0000000 --- a/man/search_finna_from_file.Rd +++ /dev/null @@ -1,23 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/search_finna_from_file.R -\name{search_finna_from_file} -\alias{search_finna_from_file} -\title{Search Finna using text from a file (extended version)} -\usage{ -search_finna_from_file(file_path, limit = 10, lng = "fi", query_limit = 500) -} -\arguments{ -\item{file_path}{The path to the text file.} - -\item{limit}{The number of results to return from Finna. Defaults to 10.} - -\item{lng}{Language for returned translated strings. Defaults to "fi".} - -\item{query_limit}{The maximum length of the query string allowed for the API. Defaults to 500 characters.} -} -\value{ -A tibble containing the Finna search results. -} -\description{ -Reads text from a file, processes it, and searches it in Finna using the \code{search_finna} function. -} diff --git a/man/timeline_relative.Rd b/man/timeline_relative.Rd deleted file mode 100644 index b653c58..0000000 --- a/man/timeline_relative.Rd +++ /dev/null @@ -1,33 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/timeline_relative.R -\name{timeline_relative} -\alias{timeline_relative} -\title{Retrieve Relative Timeline} -\usage{ -timeline_relative(x, y, myfield, time.window = 10) -} -\arguments{ -\item{x}{First data frame} - -\item{y}{Second data frame} - -\item{myfield}{Numeric field to summarize in the timeline (x/y). The number of entries (title count) per decade is used by default. If this argument is used, the sum of entries per decade for this field is given.} - -\item{time.window}{Time window for the timeline in years. Default: 10 (publication decade).} -} -\value{ -data.frame -} -\description{ -Relative timeline for selected variable between two data sets. -} -\examples{ -\dontrun{timeline_relative(df, "gatherings")} -} -\references{ -See citation("bibliographica") -} -\author{ -Leo Lahti \email{leo.lahti@iki.fi} -} -\keyword{utilities} diff --git a/tests/testthat/test-fetch_finna_collection.R b/tests/testthat/test-fetch_finna.R similarity index 100% rename from tests/testthat/test-fetch_finna_collection.R rename to tests/testthat/test-fetch_finna.R diff --git a/tests/testthat/test-save_load_offline.R b/tests/testthat/test-save_load_offline.R deleted file mode 100644 index 6a69ae2..0000000 --- a/tests/testthat/test-save_load_offline.R +++ /dev/null @@ -1,110 +0,0 @@ -test_that("save_for_offline saves valid data to a file", { - # Create a temporary data frame - test_data <- tibble::tibble( - id = 1:5, - Title = paste("Title", 1:5), - Author = paste("Author", 1:5) - ) - - # Specify a temporary file name - temp_file <- "test_search_results" - - # Run the function - expect_message( - save_for_offline(test_data, temp_file), - regexp = "Search results saved successfully to data/test_search_results.rds" - ) - - # Check if the file exists - expect_true(file.exists(file.path("data", paste0(temp_file, ".rds")))) - - # Clean up - unlink("data", recursive = TRUE) -}) - -test_that("save_for_offline adds .rds extension if missing", { - test_data <- tibble::tibble( - id = 1:3, - Title = c("A", "B", "C"), - Author = c("X", "Y", "Z") - ) - - temp_file <- "test_results" - - # Save the file without ".rds" in the name - save_for_offline(test_data, temp_file) - - # Check if the file exists with the correct extension - expect_true(file.exists(file.path("data", paste0(temp_file, ".rds")))) - - # Clean up - unlink("data", recursive = TRUE) -}) - -test_that("save_for_offline throws error for empty or NULL data", { - # Create an empty tibble - empty_data <- tibble::tibble() - - # Expect an error for empty data - expect_error( - save_for_offline(empty_data, "empty_data_test"), - regexp = "No data to save. Ensure that the search results are valid." - ) - - # Expect an error for NULL data - expect_error( - save_for_offline(NULL, "null_data_test"), - regexp = "No data to save. Ensure that the search results are valid." - ) -}) - -test_that("load_offline_data loads valid data from a file", { - # Create a temporary data frame - test_data <- tibble::tibble( - id = 1:3, - Title = c("A", "B", "C"), - Author = c("X", "Y", "Z") - ) - - # Save the test data - temp_file <- "test_load_results" - save_for_offline(test_data, temp_file) - - # Load the saved data - loaded_data <- load_offline_data(temp_file) - - # Check if the data matches - expect_equal(loaded_data, test_data) - - # Clean up - unlink("data", recursive = TRUE) -}) - -test_that("load_offline_data adds .rds extension if missing", { - # Create a temporary data frame - test_data <- tibble::tibble( - id = 1:4, - Title = c("D", "E", "F", "G"), - Author = c("P", "Q", "R", "S") - ) - - # Save the test data - temp_file <- "test_extension" - save_for_offline(test_data, temp_file) - - # Load the data without providing .rds - loaded_data <- load_offline_data("test_extension") - - # Check if the data matches - expect_equal(loaded_data, test_data) - - # Clean up - unlink("data", recursive = TRUE) -}) - -test_that("load_offline_data throws error for non-existent file", { - expect_error( - load_offline_data("non_existent_file"), - regexp = "File not found. Please ensure the file exists and try again." - ) -}) diff --git a/tests/testthat/test-search_finna_from_file.R b/tests/testthat/test-search_finna_from_file.R deleted file mode 100644 index caac59c..0000000 --- a/tests/testthat/test-search_finna_from_file.R +++ /dev/null @@ -1,64 +0,0 @@ -test_that("search_finna_from_file processes file content and performs search correctly", { - # Create a temporary file with test content - temp_file <- tempfile() - on.exit(unlink(temp_file)) - writeLines(c("This is a test query", "with multiple lines"), temp_file) - - # Perform the search - results <- suppressWarnings(search_finna_from_file(temp_file, limit = 5, lng = "en", query_limit = 50)) - - expect_true(inherits(results, "tbl_df"), "The result should be a tibble.") - expect_gt(nrow(results), 0, "The number of rows should be greater than 0.") - expect_true("Title" %in% names(results), "The result should contain a 'Title' column.") - expect_true("Author" %in% names(results), "The result should contain an 'Author' column.") -}) - -test_that("search_finna_from_file handles empty file gracefully", { - # Create a temporary empty file - temp_file <- tempfile() - on.exit(unlink(temp_file)) - writeLines(character(0), temp_file) - - expect_error( - search_finna_from_file(temp_file), - regexp = "No results found for the given file content.", - info = "The function should return an error for an empty file." - ) -}) - -test_that("search_finna_from_file handles non-existent file", { - expect_error( - search_finna_from_file("non_existent_file.txt"), - regexp = "Failed to read the file: cannot open file", - info = "The function should return an error when the file does not exist." - ) -}) - - -test_that("search_finna_from_file respects query limit and splits long text", { - # Create a temporary file with long text - temp_file <- tempfile() - on.exit(unlink(temp_file)) - writeLines(paste(rep("A very long text query that exceeds the query limit.", 10), collapse = " "), temp_file) - - # Perform the search - results <- suppressWarnings(search_finna_from_file(temp_file, query_limit = 50)) - - expect_true(inherits(results, "tbl_df"), "The result should be a tibble.") - expect_gt(nrow(results), 0, "The number of rows should be greater than 0.") -}) - -# test_that("search_finna_from_file handles search_finna returning no results", { -# # Create a temporary file with test content -# temp_file <- tempfile() -# on.exit(unlink(temp_file)) -# writeLines("This is a test query with no results", temp_file) -# -# expect_error( -# suppressWarnings(search_finna_from_file(temp_file)), -# regexp = "No results found for the given file content.", -# info = "The function should return an error when no results are found." -# ) -# }) - - diff --git a/vignettes/articles/Fennica.Rmd b/vignettes/articles/Fennica.Rmd index 673a2f9..76b33a8 100644 --- a/vignettes/articles/Fennica.Rmd +++ b/vignettes/articles/Fennica.Rmd @@ -54,89 +54,19 @@ refined_data <- refine_metadata(fennica) ``` ```{r message = FALSE, warning = FALSE, fig.alt = "visualize word cloud"} +library(finna) +library(ggplot2) fennica <- search_finna("*",filters = c('collection:"FEN"', 'search_daterange_mv:"[1809 TO 1918]"')) refined_data <- refine_metadata(fennica) -``` - -To search for specific fields like author information, publication details, and call numbers -using `search_finna`, you can use Finna's **field-specific search filters**. Finna allows -you to search in different fields by specifying the `type` parameter or adding filters. -Here's how you can modify the `search_finna` function to query these fields: - -### 1. **Search for Author Information:** +top_plot(refined_data, field = "Year") + + xlab("Publication Year") + # Custom X-axis label + ylab("Number of Publications") # Custom Y-axis label - - You can use the `type = "Author"` option to specifically search for records by author. - -```{r message = FALSE, warning = FALSE} -library(finna) -record <-search_finna(query = "Jean Sibelius", type = "Author") -record ``` -Alternatively, you can apply filters to search for authors using the `filters` parameter: - -```{r message = FALSE, warning = FALSE} -record <- search_finna(query = "Jean Sibelius", filters = c('author:"Jean Sibelius"')) -record -``` - -### 2. **Search for Publication Information:** - - If you want to search for publication information such as the publication date or publisher, you can use `type = "Title"` or `type = "AllFields"` and then apply filters: - - - For specific years, you can use the `search_daterange_mv` filter: - - ```r - search_finna(query = "Sibelius", filters = c('search_daterange_mv:"[2000 TO 2020]"')) - ``` - - - To search by publisher, you can add a filter for the publisher name: - - ```r - search_finna(query = "Sibelius", filters = c('publisher:"Ondine"')) - ``` - -### 3. **Search by Call Numbers:** - - Call numbers are used to classify items in libraries. To search by call number, you can add a filter for `callnumber-search` or `callnumber`: - - ```r - search_finna(query = "Sibelius", filters = c('callnumber-search:"78.54"')) - ``` - - This will return results where the call number is `78.54` (which is typically used for orchestral music). - -### Example Using Multiple Filters: -You can combine these search types and filters to make more complex queries. For instance, to search for works by **Jean Sibelius** published between **2000 and 2020** with the call number **78.54**: - -```r -search_finna( - query = "Sibelius", - filters = c('author:"Jean Sibelius"', 'search_daterange_mv:"[2000 TO 2020]"', 'callnumber-search:"78.54"') -) -``` - -### Code Overview: - -```r -# Author search example -search_finna(query = "Jean Sibelius", type = "Author") - -# Search for works by author with publication date range -search_finna(query = "Jean Sibelius", filters = c('search_daterange_mv:"[2000 TO 2020]"')) - -# Search for works by call number -search_finna(query = "Sibelius", filters = c('callnumber-search:"78.54"')) - -# Combine author, publication date, and call number filters -search_finna( - query = "Sibelius", - filters = c('author:"Jean Sibelius"', 'search_daterange_mv:"[2000 TO 2020]"', 'callnumber-search:"78.54"') -) -``` ### Notes: - **Filters**: The filters need to match the exact field names used in Finna's API. You can find these field names in the API documentation or by looking at the response from the API [here](https://www.kiwi.fi/display/Finna/Kenttien+mappaukset+eri+formaateista+Finnan+indeksiin). -- **Call Number Search**: Ensure that the call numbers are correctly formatted according to the library's classification system (e.g., YKL in Finland). +- **Call Number Search**: Ensure that the call numbers are correctly formatted according to the library's classification system (e.g., [YKL in Finland](https://finto.fi/ykl/fi/?clang=en)). This way, you can extract specific metadata like authors, publication years, and call numbers using the `search_finna` function. diff --git a/vignettes/articles/finna_collections.Rmd b/vignettes/articles/finna_collections.Rmd index c537968..664893a 100644 --- a/vignettes/articles/finna_collections.Rmd +++ b/vignettes/articles/finna_collections.Rmd @@ -16,6 +16,26 @@ This document demonstrates the use of the `fetch_finna` function to retrieve dat ## Example 1: Fetching EAD Records +**EAD records** refer to archival descriptions encoded using the **Encoded Archival Description (EAD)** standard, an XML-based framework designed to describe archival materials, collections, and finding aids in a structured, machine-readable format. + +### **Purpose of EAD Records** +EAD records provide detailed information about archival collections, enabling better organization, discovery, and access to historical and cultural resources stored in archives, libraries, and museums. + +### **What EAD Records Contain** + +An EAD record typically includes metadata such as: + +- **Collection Title**: Name of the archival collection. +- **Creator(s)**: Person(s) or organization(s) responsible for the collection. +- **Dates**: Time period covered by the materials. +- **Extent**: Size or physical description (e.g., number of boxes or folders). +- **Scope and Content**: Overview of what the collection contains. +- **Biographical/Historical Note**: Background on the creator or related historical context. +- **Arrangement**: How the materials are organized. +- **Access and Use Conditions**: Restrictions or guidelines for accessing the materials. +- **Container List**: Detailed inventory of items or series in the collection. + + ```{r message = FALSE, warning = FALSE} library(finna) result <- fetch_finna( @@ -43,6 +63,8 @@ result <- fetch_finna( print(result) ``` +More Metadata Formats in Finna can be found in the [link](https://www.kiwi.fi/display/Finna/OAI-PMH+Harvesting+Interface+for+Finna%27s+Index) here. + ## Conclusion diff --git a/vignettes/articles/refinemetadata.Rmd b/vignettes/articles/refinemetadata.Rmd index 23310f7..05c79be 100644 --- a/vignettes/articles/refinemetadata.Rmd +++ b/vignettes/articles/refinemetadata.Rmd @@ -1,8 +1,8 @@ --- -title: "Refine, integrate and analyse Finna metadata" +title: "Refine finna metadata" output: rmarkdown::html_vignette vignette: > - %\VignetteIndexEntry{Refine, integrate and analyse Finna metadata} + %\VignetteIndexEntry{Refine finna metadata} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -26,30 +26,6 @@ refined_data <- refine_metadata(sibelius_data) print(refined_data) ``` -### **integrate to other metadata ** -To integrate two datasets using full_join() from dplyr, you can write the code directly like this: - -```{r message = FALSE, warning = FALSE} -library(dplyr) - -# Example Finna metadata (metadata1) -finna_data <- search_finna("sibelius",limit = 4) - -# Example other dataset to merge with (metadata2) -other_data <- tibble::tibble( - Title = c("Sibelius Symphony No. 5", "Finlandia", "Valse Triste"), - Rating = c(5, 4, 3) -) - -# Integrate the two datasets using full_join by the "Title" column -integrated_data <- full_join(finna_data, other_data, by = "Title") - -# Print the integrated dataset -print(integrated_data) - -``` - - ### **Analyze using ` analyze_metadata()` Function** ```{r message = FALSE, warning = FALSE} @@ -66,7 +42,9 @@ sibelius_data <- search_finna("sibelius") refined_data <- refine_metadata(sibelius_data) analysis_results <- analyze_metadata(refined_data) #visualize_year_distribution(analysis_results$year_distribution) -top_plot(analysis_results$author_distribution, field = "n") +top_plot(analysis_results$author_distribution, field = "n") + + xlab("Frequency") + # Custom X-axis label + ylab("Number of Authors") # Custom Y-axis label ``` ### 1. yearly distribution using top_plot() @@ -75,7 +53,10 @@ library(finna) sibelius_data <- search_finna("sibelius") refined_data <- refine_metadata(sibelius_data) #visualize_year_distribution_line(refined_data) -top_plot(refined_data, field = "Year") +top_plot(refined_data, field = "Year") + + xlab("Publication Year") + # Custom X-axis label + ylab("Number of Publications") # Custom Y-axis label + ``` diff --git a/vignettes/articles/viola_collections.Rmd b/vignettes/articles/viola_collections.Rmd index 5a18309..9e61f58 100644 --- a/vignettes/articles/viola_collections.Rmd +++ b/vignettes/articles/viola_collections.Rmd @@ -69,7 +69,9 @@ library(ggplot2) # Refine metadata and visualize author distribution refined_data <- refine_metadata(results) -top_plot(refined_data, field = "Year") +top_plot(refined_data, field = "Year") + + xlab("Frequency") + # Custom X-axis label + ylab("Year") # Custom Y-axis label ``` diff --git a/vignettes/vignette.Rmd b/vignettes/vignette.Rmd index dedc985..daf1350 100644 --- a/vignettes/vignette.Rmd +++ b/vignettes/vignette.Rmd @@ -172,7 +172,7 @@ record <- search_finna("era:'2010-luku'", filters = c('building:"0/3AMK/"')) ### search without removing duplication In order to search data without removing duplication -[example](https://www.finna.fi/Search/Results?dfApplied=1&lookfor=era%3A%222010-luku%22&type=AllFields&filter[]=~building%3A%220%2F3AMK%2F%22&filter[]=finna.deduplication:%220%22) +example. ```{r message = FALSE, warning = FALSE} record <- search_finna('era:"2010-luku"', filters = c('~building:"0/3AMK/"', 'finna.deduplication:"1"'))