Skip to content

Commit

Permalink
new updates and changes
Browse files Browse the repository at this point in the history
  • Loading branch information
ake123 committed Jan 15, 2025
1 parent 8db405f commit 7398d3f
Show file tree
Hide file tree
Showing 22 changed files with 177 additions and 1,060 deletions.
18 changes: 1 addition & 17 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -17,25 +17,9 @@ export(save_for_offline)
export(search_finna)
export(search_finna_from_file)
export(search_publisher)
export(visualize_author_distribution)
export(visualize_format_distribution)
export(visualize_format_distribution_pie)
export(visualize_format_library_correlation)
export(visualize_library_distribution)
export(visualize_subject_distribution)
export(visualize_title_year_heatmap)
export(visualize_top_20_titles)
export(visualize_word_cloud)
export(visualize_year_distribution)
export(visualize_year_distribution_line)
export(top_plot)
import(dplyr)
import(ggplot2)
import(progress)
import(rlang)
import(stringr)
import(tidyr)
import(tm)
import(wordcloud2)
importFrom(curl,curl_download)
importFrom(dplyr,arrange)
importFrom(dplyr,bind_rows)
Expand Down
2 changes: 1 addition & 1 deletion R/search_finna.R
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#' \item "title,id asc" (Title)
#' }
#' @param limit An integer specifying the total number of records to return across multiple pages.
#' @param lng A string for the language of returned translated strings. Options are "fi", "en-gb", "sv", "se". Defaults to "fi".
#' @param lng A string for the language of returned translated strings. Options are "fi" - Finnish, "en-gb" - English, "sv" - Swedish, "se" - Sami. Defaults to "fi" - Finnish.
#' @param prettyPrint A logical value indicating whether to pretty-print the JSON response. Useful for debugging. Defaults to FALSE.
#' @return A tibble containing the search results with relevant fields extracted and provenance information.
#' @examples
Expand Down
103 changes: 103 additions & 0 deletions R/top_plot.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
#' @title Plot Top Entries
#' @description Visualizes the top entries for a given field in a data frame.
#' Count and percentage statistics is also shown as needed.
#' @param x Data frame, vector or factor
#' @param field Field to show
#' @param ntop Number of top entries to show
#' @param highlight Entries from the 'field' to be highlighted
#' @param max.char Max number of characters in strings. Longer strings will be cut and only max.char first characters are shown. No cutting by default
#' @param show.rest Show the count of leave-out samples (not in top-N) as an additional bar.
#' @param show.percentage Show the proportion of each category with respect to the total sample count.
#' @param log10 Show the counts on log10 scale (default FALSE)
#' @return ggplot object
#' @export
#' @author Leo Lahti \email{leo.lahti@@iki.fi}
#' @references See citation("bibliographica")
#' @examples \dontrun{p <- top_plot(x, field, 50)}
#' @keywords utilities
top_plot <- function (x, field = NULL, ntop = NULL, highlight = NULL, max.char = Inf, show.rest = FALSE, show.percentage = FALSE, log10 = FALSE) {

# Circumvent warnings in build
color <- percentage <- NULL

if (is.data.frame(x)) {
x <- x[[field]]
}

if (is.factor(x) || is.character(x) || is.numeric(x)) {
x <- droplevels(as.factor(x))
}

if (length(x) == 0) {
return(ggplot())
}

tab <- rev(sort(table(x)))
tab <- tab[tab > 0]

dfs <- data.frame(names = names(tab), count = as.numeric(tab))

# Show all cases if ntop not specified
if (is.null(ntop)) {
ntop <- nrow(dfs)
}
ntop <- min(ntop, nrow(dfs))

dfs <- dfs[1:ntop,] # Pick top-n items
topp <- sum(dfs$count)/sum(tab)

if (show.rest & ntop < length(tab)) {
dfs2 <- data.frame(list(names = "Other", count = sum(tab) - sum(dfs$count)))
dfs <- bind_rows(dfs, dfs2)
}

# Limit length of names in the printout
if (is.infinite(max.char)) {
max.char <- max(nchar(as.character(dfs$names)))
}

levels1 <- length(unique(dfs$names))
dfs$names <- substr(as.character(dfs$names), 1, max.char)
levels2 <- length(unique(dfs$names))
if (!levels1 == levels2) {
warning("Truncating the names is mixing up some of the variable names.")
}

# Arrange levels; leave the leaveout category as the last one
levs <- rev(unique(dfs$names))
if ("Other" %in% levs) {
levs <- c("Other", setdiff(levs, "Other"))
}
dfs$names <- droplevels(factor(dfs$names, levels = levs))
dfs$percentage <- round(100 * dfs$count/sum(dfs$count), 1)

dfs$color <- rep("black", nrow(dfs))
if (!is.null(highlight)) {
dfs$color <- rep("darkgray", nrow(dfs))
dfs$color[dfs$names %in% highlight] <- "red"
p <- ggplot(dfs, aes(x = names, y = count, fill = color))
} else {
p <- ggplot(dfs, aes(x = names, y = count))
}

p <- p + geom_bar(stat = "identity", color = "black", fill = "white")
p <- p + coord_flip()
p <- p + xlab("") + ylab(paste(field, "(N)"))

s <- paste("Total N=", sum(tab), " / Top-", ntop, ": ", round(100 * topp, 1), "%", sep = "")

p <- p + labs(title = s)

if (show.percentage) {
p <- p + geom_text(aes(x = names, y = 30,
label = paste(percentage, "%", sep = "")))
}


if (log10) {
p <- p + scale_y_log10() + labs(y = paste(field, "(N)"))
}

p

}
Loading

0 comments on commit 7398d3f

Please sign in to comment.