Skip to content

Commit

Permalink
Merge branch 'get-most-comments' into master, add new function as int…
Browse files Browse the repository at this point in the history
…ermediate fix for `get_all_comments()` that doesn't seem to be working properly (see gojiplus#52)
  • Loading branch information
balthasars committed Apr 20, 2021
2 parents 20f9eb5 + 8acb717 commit 77092a8
Show file tree
Hide file tree
Showing 7 changed files with 244 additions and 12 deletions.
4 changes: 3 additions & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,9 @@ Imports:
magrittr,
tidyr,
tidyselect,
tibble
tibble,
stringr,
rlang
VignetteBuilder: knitr
Suggests:
knitr (>= 1.11),
Expand Down
8 changes: 8 additions & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ export(get_captions)
export(get_channel_stats)
export(get_comment_threads)
export(get_comments)
export(get_most_comments)
export(get_playlist_items)
export(get_playlists)
export(get_related_videos)
Expand Down Expand Up @@ -44,7 +45,10 @@ importFrom(dplyr,bind_rows)
importFrom(dplyr,filter)
importFrom(dplyr,mutate)
importFrom(dplyr,pull)
importFrom(dplyr,rename)
importFrom(dplyr,rename_at)
importFrom(dplyr,select)
importFrom(dplyr,vars)
importFrom(httr,DELETE)
importFrom(httr,GET)
importFrom(httr,POST)
Expand All @@ -61,11 +65,15 @@ importFrom(magrittr,"%>%")
importFrom(plyr,ldply)
importFrom(purrr,map_dbl)
importFrom(purrr,map_df)
importFrom(rlang,.data)
importFrom(stringr,str_remove)
importFrom(tibble,enframe)
importFrom(tidyr,pivot_wider)
importFrom(tidyr,unnest)
importFrom(tidyr,unnest_longer)
importFrom(tidyr,unnest_wider)
importFrom(tidyselect,all_of)
importFrom(tidyselect,everything)
importFrom(tidyselect,starts_with)
importFrom(utils,browseURL)
importFrom(utils,read.table)
189 changes: 189 additions & 0 deletions R/get_most_comments.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
# helpers

parse_comment_thread <- function(res) {
res %>%
# fields that may not be available:
# live streaming details
conditional_unnest_wider(var = "topLevelComment") %>%
conditional_unnest_wider(var = "topLevelComment_snippet") %>%
conditional_unnest_wider(var = "topLevelComment_snippet_authorChannelId") %>%
conditional_unnest_wider(var = "pageInfo") %>%
dplyr::select(-c(id)) %>%
# rename to make compatible with other comments later
dplyr::rename_at(
dplyr::vars(tidyselect::starts_with("topLevelComment_")),
~stringr::str_remove(.x, "topLevelComment_")
) %>%
dplyr::mutate(is_reply = FALSE)
}

parse_replies <- function(comment_thread) {
replies <- comment_thread %>%
dplyr::select(replies, totalReplyCount) %>%
tidyr::unnest_wider(replies) %>%
dplyr::filter(totalReplyCount > 0)

if (nrow(replies) >= 0) {
replies <- replies %>%
tidyr::unnest(comments) %>%
conditional_unnest_wider("comments") %>%
# rename to make compatible with other comments
dplyr::rename_at(
dplyr::vars(tidyselect::starts_with("comments_")),
~ stringr::str_remove(.x, "comments_")
) %>%
conditional_unnest_wider("snippet") %>%
conditional_unnest_wider("snippet_authorChannelId") %>%
dplyr::mutate(is_reply = TRUE)
}
}

get_parse_bind_comments <- function(filter = NULL, page_token = NULL,
part = part, text_format = text_format, max_results = 100) {
if (max_results < 20) {
stop("max_results only takes a value over 20.
Above 100, it outputs all the results.")
}

if (text_format != "html" & text_format != "plainText") {
stop("Provide a legitimate value of textFormat.")
}

if (!(names(filter) %in%
c("video_id", "channel_id", "thread_id", "threads_related_to_channel"))) {
stop("filter can only take one of values: channel_id, video_id, parent_id,
threads_related_to_channel.")
}

if (length(filter) != 1) stop("filter must be a vector of length 1.")

orig_filter <- filter
translate_filter <- c(
video_id = "videoId", thread_id = "id",
threads_related_to_channel = "allThreadsRelatedToChannelId",
channel_id = "channelId", page_token = "pageToken"
)

yt_filter_name <- as.vector(translate_filter[match(
names(filter),
names(translate_filter)
)])
names(filter) <- yt_filter_name

querylist <- list(
part = part, maxResults =
ifelse(max_results > 100, 100, max_results),
textFormat = text_format,
pageToken = page_token
)

querylist <- c(querylist, filter)
# print(querylist)

## get first page of results of a comment thread and
## initialize objects with content of first page before
## proceeding to next pages of API response
res <- tuber_GET("commentThreads", querylist)
# parse results
snippet <- parse_snippet(res)
comment_thread <- parse_comment_thread(snippet)
replies <- parse_replies(comment_thread)
# get columns names of columns that will be NA upon binding the two dataframes
na_cols_1 <- setdiff(
colnames(comment_thread),
colnames(replies)
)
# setdiff(
# colnames(replies),
# colnames(comment_thread)
# )

comments <- dplyr::bind_rows(
comment_thread, replies
) %>%
dplyr::select(-c(replies)) %>%
# dplyr::filter(totalReplyCount > 1) %>%
# make columns complete if missing to avoid NAs
tidyr::fill(tidyselect::any_of(na_cols_1), .direction = "down")

# get all following pages of comment thread
# agg_res <- comments

# # shouldn't this be `unique()`?
# next_page_token <- unique(res$nextPageToken)
# print("erstes Mal")
# print(next_page_token)

# print("finished")

comments
}


#' Get Most Comments
#'
#' Retrieves all top level comments and replies to them.
#' Replies to replies are not included.
#' @param video_id ID of video, required.
#' @return
#' Data frame with all comments and replies.
#'
#' @export get_most_comments
#'
#' @examples
#' \dontrun{
#'
#' # Set API token via yt_oauth() first
#'
#' get_most_comments(video_id = "Hop_MfkXl7c")
#' }
#'
#'
#'
get_most_comments <- function(video_id) {
video_id_arg <- video_id
part_arg <- "snippet,replies,id"
filter_arg <- c(video_id = video_id_arg)
text_format_arg <- "html"

# initialize objects for loop
all_data <- get_parse_bind_comments(
filter = filter_arg,
page_token = NULL,
part = part_arg,
text_format = text_format_arg,
max_results = 100
)
counter_while <- 0
suppressWarnings(next_page_token <- unique(all_data$nextPageToken))

# loop over results until last nextPageToken
while (counter_while == 0 | !is.null(next_page_token)) {
next_data <- get_parse_bind_comments(
filter = filter_arg,
page_token = next_page_token,
part = part_arg,
text_format = text_format_arg,
max_results = 100
)
counter_while <- counter_while + 1
# cli::cli_alert_success("Page {counter_while} packages.")

# overwrite `next_page_token` that was initialized outside loop
# with new content that was just retrieved in the data
suppressWarnings(next_page_token <- unique(all_data$nextPageToken))

# overwrite `all_data` that was initialized outside loop
# using `all_data` from outside of loop in first iteration
# and then using itself from previous iteration plus
# new `next_data`.
all_data <- dplyr::bind_rows(next_data, all_data)
}
return(all_data)
}

# tuber::yt_oauth(
# app_id = Sys.getenv("YOUTUBE_API_APP_ID"),
# app_secret = Sys.getenv("YOUTUBE_API_CLIENT_SECRET")
# )
# get_most_comments(video_id = "Hop_MfkXl7c")
15 changes: 7 additions & 8 deletions R/get_video_details.R
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,8 @@ conditional_unnest_wider <- function(data_input, var) {
}
}


json_to_df <- function(res) {
intermediate <- res %>%
parse_snippet <- function(res){
res %>%
tibble::enframe() %>%
tidyr::pivot_wider() %>%
tidyr::unnest(cols = c(kind, etag)) %>%
Expand All @@ -20,8 +19,10 @@ json_to_df <- function(res) {
# reflect level of nesting in column name for those that may not be unique
dplyr::rename(items_kind = kind, items_etag = etag) %>%
tidyr::unnest_wider(snippet)
}

intermediate_2 <- intermediate %>%
parse_video_details <- function(res) {
res %>%
# fields that may not be available:
# live streaming details
conditional_unnest_wider(var = "liveStreamingDetails") %>%
Expand All @@ -46,9 +47,6 @@ json_to_df <- function(res) {
conditional_unnest_wider(var = "thumbnails_medium") %>%
conditional_unnest_wider(var = "thumbnails_high") %>%
conditional_unnest_wider(var = "thumbnails_maxres")


intermediate_2
}

#' Get Details of a Video or Videos
Expand Down Expand Up @@ -124,7 +122,8 @@ get_video_details <- function(video_id = NULL, part = "snippet", as.data.frame =
}

if (as.data.frame) {
raw_res <- json_to_df(raw_res)
snippet_df <- parse_snippet(raw_res)
raw_res <- parse_video_details(snippet_df)
}

raw_res
Expand Down
3 changes: 3 additions & 0 deletions R/globals.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
# fix warning about no visible bindings due to tidyverse functions
# https://community.rstudio.com/t/how-to-solve-no-visible-binding-for-global-variable-note/28887
utils::globalVariables(c("totalReplyCount", "id", "comments", "kind", "etag", "items", "snippet", "video_id_arg"))
8 changes: 5 additions & 3 deletions R/tuber.R
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,12 @@
#' @importFrom httr upload_file content oauth_endpoints oauth_app oauth2.0_token
#' @importFrom utils read.table
#' @importFrom plyr ldply
#' @importFrom dplyr bind_rows select pull filter mutate
#' @importFrom dplyr bind_rows select pull filter mutate vars rename_at rename
#' @importFrom rlang .data
#' @importFrom stringr str_remove
#' @importFrom tibble enframe
#' @importFrom tidyselect everything all_of
#' @importFrom tidyr pivot_wider unnest unnest_longer
#' @importFrom tidyselect everything all_of starts_with
#' @importFrom tidyr pivot_wider unnest unnest_longer unnest_wider
#' @importFrom purrr map_df map_dbl
#' @docType package
NULL
Expand Down
29 changes: 29 additions & 0 deletions man/get_most_comments.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 77092a8

Please sign in to comment.