From 1d672b1070984e289c75932ab55474bfd4a85065 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar.sager@gmail.com>
Date: Mon, 19 Apr 2021 14:37:24 +0200
Subject: [PATCH 1/6] make code more modular for `get_video_details()` by
 dividing helpers up

---
 R/get_video_details.R | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/R/get_video_details.R b/R/get_video_details.R
index c9811ca..5f508c0 100644
--- a/R/get_video_details.R
+++ b/R/get_video_details.R
@@ -7,9 +7,8 @@ conditional_unnest_wider <- function(data_input, var) {
   }
 }
 
-
-json_to_df <- function(res) {
-  intermediate <- res %>%
+parse_snippet <- function(res){
+  res %>%
     tibble::enframe() %>%
     tidyr::pivot_wider() %>%
     tidyr::unnest(cols = c(kind, etag)) %>%
@@ -20,8 +19,10 @@ json_to_df <- function(res) {
     # reflect level of nesting in column name for those that may not be unique
     dplyr::rename(items_kind = kind, items_etag = etag) %>%
     tidyr::unnest_wider(snippet)
+}
 
-  intermediate_2 <- intermediate %>%
+parse_video_details <- function(res) {
+  res %>%
     # fields that may not be available:
     # live streaming details
     conditional_unnest_wider(var = "liveStreamingDetails") %>%
@@ -46,9 +47,6 @@ json_to_df <- function(res) {
     conditional_unnest_wider(var = "thumbnails_medium") %>%
     conditional_unnest_wider(var = "thumbnails_high") %>%
     conditional_unnest_wider(var = "thumbnails_maxres")
-
-
-  intermediate_2
 }
 
 #' Get Details of a Video or Videos
@@ -124,7 +122,8 @@ get_video_details <- function(video_id = NULL, part = "snippet", as.data.frame =
   }
 
   if (as.data.frame) {
-    raw_res <- json_to_df(raw_res)
+    snippet_df <- parse_snippet(raw_res)
+    raw_res <- parse_video_details(snippet_df)
   }
 
   raw_res

From a0cfbb77b84e423ef757c73d06ff55d3d97e9050 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar.sager@gmail.com>
Date: Mon, 19 Apr 2021 20:42:55 +0200
Subject: [PATCH 2/6] get_most_comments

---
 DESCRIPTION              |   4 +-
 NAMESPACE                |   8 ++
 R/get_most_comments.R    | 179 +++++++++++++++++++++++++++++++++++++++
 R/globals.R              |   3 +
 R/tuber.R                |   8 +-
 man/get_most_comments.Rd |  69 +++++++++++++++
 6 files changed, 267 insertions(+), 4 deletions(-)
 create mode 100644 R/get_most_comments.R
 create mode 100644 R/globals.R
 create mode 100644 man/get_most_comments.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 036aaa1..47dcaca 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -26,7 +26,9 @@ Imports:
     magrittr,
     tidyr,
     tidyselect,
-    tibble
+    tibble,
+    stringr,
+    rlang
 VignetteBuilder: knitr
 Suggests:
     knitr (>= 1.11),
diff --git a/NAMESPACE b/NAMESPACE
index ce09053..ef74caf 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -13,6 +13,7 @@ export(get_captions)
 export(get_channel_stats)
 export(get_comment_threads)
 export(get_comments)
+export(get_most_comments)
 export(get_playlist_items)
 export(get_playlists)
 export(get_related_videos)
@@ -44,7 +45,10 @@ importFrom(dplyr,bind_rows)
 importFrom(dplyr,filter)
 importFrom(dplyr,mutate)
 importFrom(dplyr,pull)
+importFrom(dplyr,rename)
+importFrom(dplyr,rename_at)
 importFrom(dplyr,select)
+importFrom(dplyr,vars)
 importFrom(httr,DELETE)
 importFrom(httr,GET)
 importFrom(httr,POST)
@@ -61,11 +65,15 @@ importFrom(magrittr,"%>%")
 importFrom(plyr,ldply)
 importFrom(purrr,map_dbl)
 importFrom(purrr,map_df)
+importFrom(rlang,.data)
+importFrom(stringr,str_remove)
 importFrom(tibble,enframe)
 importFrom(tidyr,pivot_wider)
 importFrom(tidyr,unnest)
 importFrom(tidyr,unnest_longer)
+importFrom(tidyr,unnest_wider)
 importFrom(tidyselect,all_of)
 importFrom(tidyselect,everything)
+importFrom(tidyselect,starts_with)
 importFrom(utils,browseURL)
 importFrom(utils,read.table)
diff --git a/R/get_most_comments.R b/R/get_most_comments.R
new file mode 100644
index 0000000..7972228
--- /dev/null
+++ b/R/get_most_comments.R
@@ -0,0 +1,179 @@
+# helpers
+
+parse_comment_thread <- function(res) {
+  res %>%
+    # fields that may not be available:
+    # live streaming details
+    conditional_unnest_wider(var = "topLevelComment") %>%
+    conditional_unnest_wider(var = "topLevelComment_snippet") %>%
+    conditional_unnest_wider(var = "topLevelComment_snippet_authorChannelId") %>%
+    dplyr::select(-c(id)) %>%
+    # rename to make compatible with other comments later
+    dplyr::rename_at(
+      dplyr::vars(tidyselect::starts_with("topLevelComment_")),
+      ~stringr::str_remove(.x, "topLevelComment_")
+    ) %>%
+    dplyr::mutate(is_reply = FALSE)
+}
+
+parse_replies <- function(comment_thread) {
+  replies <- comment_thread %>%
+    dplyr::select(replies, totalReplyCount) %>%
+    tidyr::unnest_wider(replies) %>%
+    dplyr::filter(totalReplyCount > 0)
+
+  if (nrow(replies) >= 0) {
+    replies <- replies %>%
+      tidyr::unnest(comments) %>%
+      conditional_unnest_wider("comments") %>%
+      # rename to make compatible with other comments
+      dplyr::rename_at(
+        dplyr::vars(tidyselect::starts_with("comments_")),
+        ~ stringr::str_remove(.x, "comments_")
+      ) %>%
+      conditional_unnest_wider("snippet") %>%
+      conditional_unnest_wider("snippet_authorChannelId") %>%
+      dplyr::mutate(is_reply = TRUE)
+  }
+}
+
+#' Get Most Comments
+#'
+#' @param filter string; Required.
+#' named vector of length 1
+#' potential names of the entry in the vector:
+#' \code{video_id}: video ID.
+#' \code{channel_id}: channel ID.
+#' \code{thread_id}: comma-separated list of comment thread IDs
+#' \code{threads_related_to_channel}: channel ID.
+#'
+#' @param part  Comment resource requested. Required. Comma separated list
+#' of one or more of the
+#' following: \code{id, snippet}. e.g., \code{"id, snippet"},
+#' \code{"id"}, etc. Default: \code{snippet}.
+#' @param max_results  Maximum number of items that should be returned.
+#'  Integer. Optional. Default is 100.
+#' If the value is greater than 100 then the function fetches all the
+#' results. The outcome is a simplified \code{data.frame}.
+#' @param page_token  Specific page in the result set that should be
+#' returned. Optional.
+#' @param text_format Data Type: Character. Default is \code{"html"}.
+#' Only takes \code{"html"} or \code{"plainText"}. Optional.
+#' @param \dots Additional arguments passed to \code{\link{tuber_GET}}.
+#'
+#' @return
+#' Nested named list. The entry \code{items} is a list of comments
+#' along with meta information.
+#' Within each of the \code{items} is an item \code{snippet} which
+#' has an item \code{topLevelComment$snippet$textDisplay}
+#' that contains the actual comment.
+#'
+#' If simplify is \code{TRUE}, a \code{data.frame} with the following columns:
+#' \code{authorDisplayName, authorProfileImageUrl, authorChannelUrl,
+#' authorChannelId.value, videoId, textDisplay,
+#' canRate, viewerRating, likeCount, publishedAt, updatedAt}
+#'
+#' @export get_most_comments
+#'
+#' @references \url{https://developers.google.com/youtube/v3/docs/commentThreads/list}
+#'
+#' @examples
+#' \dontrun{
+#'
+#' # Set API token via yt_oauth() first
+#'
+#' get_most_comments(filter = c(video_id = "N708P-A45D0"))
+#' get_most_comments(filter = c(video_id = "N708P-A45D0"), max_results = 101)
+#' }
+get_most_comments <- function(filter = NULL, part = "snippet,replies",
+                              text_format = "html", max_results = 101, page_token = NULL, ...) {
+  if (max_results < 20) {
+    stop("max_results only takes a value over 20.
+          Above 100, it outputs all the results.")
+  }
+
+  if (text_format != "html" & text_format != "plainText") {
+    stop("Provide a legitimate value of textFormat.")
+  }
+
+  if (!(names(filter) %in%
+    c("video_id", "channel_id", "thread_id", "threads_related_to_channel"))) {
+    stop("filter can only take one of values: channel_id, video_id, parent_id,
+      threads_related_to_channel.")
+  }
+
+  if (length(filter) != 1) stop("filter must be a vector of length 1.")
+
+  orig_filter <- filter
+  translate_filter <- c(
+    video_id = "videoId", thread_id = "id",
+    threads_related_to_channel = "allThreadsRelatedToChannelId",
+    channel_id = "channelId", page_token = "pageToken"
+  )
+
+  yt_filter_name <- as.vector(translate_filter[match(
+    names(filter),
+    names(translate_filter)
+  )])
+  names(filter) <- yt_filter_name
+
+  querylist <- list(
+    part = part, maxResults =
+      ifelse(max_results > 100, 100, max_results),
+    textFormat = text_format
+  )
+  querylist <- c(querylist, filter)
+
+  ## get first page of results of a comment thread and
+  ## initialize objects with content of first page before
+  ## proceeding to next pages of API response
+  res <- tuber_GET("commentThreads", querylist, ...)
+  # parse results
+  snippet <- parse_snippet(res)
+  comment_thread <- parse_comment_thread(snippet)
+  replies <- parse_replies(comment_thread)
+  # get columns names of columns that will be NA upon binding the two dataframes
+  na_cols_1 <- setdiff(
+    colnames(comment_thread),
+    colnames(replies)
+  )
+  # setdiff(
+  #   colnames(replies),
+  #   colnames(comment_thread)
+  # )
+
+  comments <- dplyr::bind_rows(
+    comment_thread, replies
+  ) %>%
+    dplyr::select(-c(replies)) %>%
+    dplyr::filter(totalReplyCount > 1) %>%
+    # make columns complete if missing to avoid NAs
+    tidyr::fill(tidyselect::any_of(na_cols_1), .direction = "down")
+
+  # get all following pages of comment thread
+  agg_res <- comments
+
+    #   # shouldn't this be `unique()`?
+  next_page_token <- res$nextPageToken
+  print("erstes Mal")
+  print(next_page_token)
+
+  while (!is.null(next_page_token)) {
+      print("zweites Mal")
+      print(next_page_token)
+      next_results <- get_most_comments(orig_filter,
+      part = part,
+      text_format = text_format,
+      simplify = FALSE,
+      max_results = 101,
+      page_token = next_page_token
+    )
+      agg_res <- rbind(next_results, agg_res)
+      # get token with link to next result page
+      next_page_token <- next_results$nextPageToken
+  print(next_page_token)
+  }
+  print("finished")
+  return(agg_res)
+}
+
diff --git a/R/globals.R b/R/globals.R
new file mode 100644
index 0000000..100c9e0
--- /dev/null
+++ b/R/globals.R
@@ -0,0 +1,3 @@
+# fix warning about no visible bindings due to tidyverse functions
+# https://community.rstudio.com/t/how-to-solve-no-visible-binding-for-global-variable-note/28887
+utils::globalVariables(c("totalReplyCount", "id", "comments", "kind", "etag", "items", "snippet"))
diff --git a/R/tuber.R b/R/tuber.R
index cba0a64..00ebb27 100644
--- a/R/tuber.R
+++ b/R/tuber.R
@@ -8,10 +8,12 @@
 #' @importFrom httr upload_file content oauth_endpoints oauth_app oauth2.0_token
 #' @importFrom utils read.table
 #' @importFrom plyr ldply
-#' @importFrom dplyr bind_rows select pull filter mutate
+#' @importFrom dplyr bind_rows select pull filter mutate vars rename_at rename
+#' @importFrom rlang .data
+#' @importFrom stringr str_remove
 #' @importFrom tibble enframe
-#' @importFrom tidyselect everything all_of
-#' @importFrom tidyr pivot_wider unnest unnest_longer
+#' @importFrom tidyselect everything all_of starts_with
+#' @importFrom tidyr pivot_wider unnest unnest_longer unnest_wider
 #' @importFrom purrr map_df map_dbl
 #' @docType package
 NULL
diff --git a/man/get_most_comments.Rd b/man/get_most_comments.Rd
new file mode 100644
index 0000000..48cd150
--- /dev/null
+++ b/man/get_most_comments.Rd
@@ -0,0 +1,69 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/get_most_comments.R
+\name{get_most_comments}
+\alias{get_most_comments}
+\title{Get Most Comments}
+\usage{
+get_most_comments(
+  filter = NULL,
+  part = "snippet,replies",
+  text_format = "html",
+  max_results = 101,
+  page_token = NULL,
+  ...
+)
+}
+\arguments{
+\item{filter}{string; Required.
+named vector of length 1
+potential names of the entry in the vector:
+\code{video_id}: video ID.
+\code{channel_id}: channel ID.
+\code{thread_id}: comma-separated list of comment thread IDs
+\code{threads_related_to_channel}: channel ID.}
+
+\item{part}{Comment resource requested. Required. Comma separated list
+of one or more of the
+following: \code{id, snippet}. e.g., \code{"id, snippet"},
+\code{"id"}, etc. Default: \code{snippet}.}
+
+\item{text_format}{Data Type: Character. Default is \code{"html"}.
+Only takes \code{"html"} or \code{"plainText"}. Optional.}
+
+\item{max_results}{Maximum number of items that should be returned.
+ Integer. Optional. Default is 100.
+If the value is greater than 100 then the function fetches all the
+results. The outcome is a simplified \code{data.frame}.}
+
+\item{page_token}{Specific page in the result set that should be
+returned. Optional.}
+
+\item{\dots}{Additional arguments passed to \code{\link{tuber_GET}}.}
+}
+\value{
+Nested named list. The entry \code{items} is a list of comments
+along with meta information.
+Within each of the \code{items} is an item \code{snippet} which
+has an item \code{topLevelComment$snippet$textDisplay}
+that contains the actual comment.
+
+If simplify is \code{TRUE}, a \code{data.frame} with the following columns:
+\code{authorDisplayName, authorProfileImageUrl, authorChannelUrl,
+authorChannelId.value, videoId, textDisplay,
+canRate, viewerRating, likeCount, publishedAt, updatedAt}
+}
+\description{
+Get Most Comments
+}
+\examples{
+\dontrun{
+
+# Set API token via yt_oauth() first
+
+get_most_comments(filter = c(video_id = "N708P-A45D0"))
+get_most_comments(filter = c(video_id = "N708P-A45D0"), max_results = 101)
+}
+}
+\references{
+\url{https://developers.google.com/youtube/v3/docs/commentThreads/list}
+}

From b4c81de4d7576f951573568385c036c4f62c5cc5 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar.sager@gmail.com>
Date: Tue, 20 Apr 2021 00:39:59 +0200
Subject: [PATCH 3/6] working prototype do not touch :)

---
 R/get_most_comments.R | 93 +++++++++++++++++++++++++++++++------------
 1 file changed, 67 insertions(+), 26 deletions(-)

diff --git a/R/get_most_comments.R b/R/get_most_comments.R
index 7972228..295f4e6 100644
--- a/R/get_most_comments.R
+++ b/R/get_most_comments.R
@@ -85,11 +85,14 @@ parse_replies <- function(comment_thread) {
 #' get_most_comments(filter = c(video_id = "N708P-A45D0"))
 #' get_most_comments(filter = c(video_id = "N708P-A45D0"), max_results = 101)
 #' }
-get_most_comments <- function(filter = NULL, part = "snippet,replies",
-                              text_format = "html", max_results = 101, page_token = NULL, ...) {
+#'
+#'
+
+get_most_comments <- function(filter = NULL, part = "snippet,replies,id",
+                              text_format = "html", max_results = 100, page_token = NULL, ...) {
   if (max_results < 20) {
     stop("max_results only takes a value over 20.
-          Above 100, it outputs all the results.")
+            Above 100, it outputs all the results.")
   }
 
   if (text_format != "html" & text_format != "plainText") {
@@ -99,7 +102,7 @@ get_most_comments <- function(filter = NULL, part = "snippet,replies",
   if (!(names(filter) %in%
     c("video_id", "channel_id", "thread_id", "threads_related_to_channel"))) {
     stop("filter can only take one of values: channel_id, video_id, parent_id,
-      threads_related_to_channel.")
+        threads_related_to_channel.")
   }
 
   if (length(filter) != 1) stop("filter must be a vector of length 1.")
@@ -120,14 +123,17 @@ get_most_comments <- function(filter = NULL, part = "snippet,replies",
   querylist <- list(
     part = part, maxResults =
       ifelse(max_results > 100, 100, max_results),
-    textFormat = text_format
+    textFormat = text_format,
+    pageToken = page_token
   )
+
   querylist <- c(querylist, filter)
+  print(querylist)
 
   ## get first page of results of a comment thread and
   ## initialize objects with content of first page before
   ## proceeding to next pages of API response
-  res <- tuber_GET("commentThreads", querylist, ...)
+  res <- tuber:::tuber_GET("commentThreads", querylist, ...)
   # parse results
   snippet <- parse_snippet(res)
   comment_thread <- parse_comment_thread(snippet)
@@ -146,34 +152,69 @@ get_most_comments <- function(filter = NULL, part = "snippet,replies",
     comment_thread, replies
   ) %>%
     dplyr::select(-c(replies)) %>%
-    dplyr::filter(totalReplyCount > 1) %>%
+    # dplyr::filter(totalReplyCount > 1) %>%
     # make columns complete if missing to avoid NAs
     tidyr::fill(tidyselect::any_of(na_cols_1), .direction = "down")
 
   # get all following pages of comment thread
-  agg_res <- comments
+  # agg_res <- comments
 
-    #   # shouldn't this be `unique()`?
-  next_page_token <- res$nextPageToken
+  #   # shouldn't this be `unique()`?
+  # next_page_token <- unique(res$nextPageToken)
   print("erstes Mal")
   print(next_page_token)
 
-  while (!is.null(next_page_token)) {
-      print("zweites Mal")
-      print(next_page_token)
-      next_results <- get_most_comments(orig_filter,
-      part = part,
-      text_format = text_format,
-      simplify = FALSE,
-      max_results = 101,
-      page_token = next_page_token
-    )
-      agg_res <- rbind(next_results, agg_res)
-      # get token with link to next result page
-      next_page_token <- next_results$nextPageToken
-  print(next_page_token)
-  }
   print("finished")
-  return(agg_res)
+
+  comments
+}
+
+library(magrittr)
+tuber::yt_oauth(
+  app_id = Sys.getenv("YOUTUBE_API_APP_ID"),
+  app_secret = Sys.getenv("YOUTUBE_API_CLIENT_SECRET")
+)
+
+all_data <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = NULL)
+counter_while <- 0
+# next_page_token <- NULL
+next_page_token <- unique(all_data$nextPageToken)
+
+while (counter_while == 0 | !is.null(next_page_token)) {
+  next_data <- get_most_comments(
+    filter = c(video_id = "Hop_MfkXl7c"),
+    page_token = next_page_token
+  )
+  next_page_token <- unique(next_data$nextPageToken)
+  print(next_data)
+  counter_while <- counter_while + 1
+  message(paste(counter_while, ":", "counter_while"))
+  message(paste(next_page_token, ":", "next_page_token"))
+  all_data <- dplyr::bind_rows(next_data, all_data)
+}
+return(all_data)
+
+retrieve_data_from_paginated_api <- function(video_id){
+
 }
 
+# example <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = NULL)
+# example %>%
+#   tidyr::unnest(nextPageToken)
+#
+# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = "QURTSl9pMjVDT2V3WGF5Nm5ha3ZYam1HbWVPMFVybTJuWk96R2UyOTZwNnVwSXFJdTJfUFVpSVI3VUxqbU1TSGpKWVpCcFpITEl4cm83dw==")
+# tuber::get_comment_threads(filter = c(video_id = "Hop_MfkXl7c")) %>%
+#   tibble::as_tibble()
+#
+# res_2 <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"))
+#
+# res_2$nextPageToken
+#
+# t1 <- "QURTSl9pMnM3TFpxY2FQUTVDNGVfTTBDRUF0Nm52R0RXNGRuM1R3a21fMDZhemR0aUtDeHRTWnV3UXpmREs0cnI0TmYzOXh2VTlzRXdOYw=="
+# t2 <- "QURTSl9pMHdOWXBXWlRBakdlQnk1VXBUNEljSXM0QTU0WkNuOEp2VFBiX0RMQlhMeDVGdEo1UTlpWm5BaEFTRGZZZWU4UmV0LVFMTmFydw=="
+# t3 <- "QURTSl9pMlY5M1Q1VkxsZW9RbXR0VG1acXkzRjRMaDYxcEc4c05UenVUc0VIR0tXbmZmVHU2V3RVdC1WUmlHaW5wa19WUlJfS19vSGEzNA=="
+#
+# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = t1)
+# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = t2)
+# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = t3)
+#

From 31a1f417f9e1f7e82a8fc55ce59999fc193ba5da Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar.sager@gmail.com>
Date: Tue, 20 Apr 2021 01:27:03 +0200
Subject: [PATCH 4/6] function from prototype

---
 R/get_most_comments.R | 33 +++++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/R/get_most_comments.R b/R/get_most_comments.R
index 295f4e6..23b52d2 100644
--- a/R/get_most_comments.R
+++ b/R/get_most_comments.R
@@ -194,10 +194,39 @@ while (counter_while == 0 | !is.null(next_page_token)) {
 }
 return(all_data)
 
-retrieve_data_from_paginated_api <- function(video_id){
-
+retrieve_data_from_paginated_api <- function(video_id_input) {
+
+  # initialize objects for loop
+  all_data <- get_most_comments(filter = c(video_id = video_id_input), page_token = NULL)
+  counter_while <- 0
+  next_page_token <- unique(all_data$nextPageToken)
+
+  # loop over results until last nextPageToken
+  while (counter_while == 0 | !is.null(next_page_token)) {
+    next_data <- get_most_comments(
+      filter = c(video_id = video_id_input),
+      page_token = next_page_token
+    )
+
+    # overwrite `next_page_token` that was initialized outside loop
+    # with new content that was just retrieved in the data
+    next_page_token <- unique(next_data$nextPageToken)
+    counter_while <- counter_while + 1
+
+    # overwrite `all_data` that was initialized outside loop
+    # using `all_data` from outside of loop in first iteration
+    # and then using itself from previous iteration plus
+    # new `next_data`.
+    all_data <- dplyr::bind_rows(next_data, all_data)
+  }
+  return(all_data)
 }
 
+# new_example_with_function <- retrieve_data_from_paginated_api(video_id_input = "Hop_MfkXl7c")
+# new_example_with_function %>%
+#   dplyr::filter(totalReplyCount > 1 & is_reply)
+# length(unique(new_example_with_function$snippet_textOriginal))
+
 # example <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = NULL)
 # example %>%
 #   tidyr::unnest(nextPageToken)

From 1d820fd3bab5d321fcb2154af5edc8d9dcee5369 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar.sager@gmail.com>
Date: Tue, 20 Apr 2021 15:10:50 +0200
Subject: [PATCH 5/6] forgot to include pageInfo

---
 R/get_most_comments.R | 1 +
 1 file changed, 1 insertion(+)

diff --git a/R/get_most_comments.R b/R/get_most_comments.R
index 23b52d2..fbd260e 100644
--- a/R/get_most_comments.R
+++ b/R/get_most_comments.R
@@ -7,6 +7,7 @@ parse_comment_thread <- function(res) {
     conditional_unnest_wider(var = "topLevelComment") %>%
     conditional_unnest_wider(var = "topLevelComment_snippet") %>%
     conditional_unnest_wider(var = "topLevelComment_snippet_authorChannelId") %>%
+    conditional_unnest_wider(var = "pageInfo") %>%
     dplyr::select(-c(id)) %>%
     # rename to make compatible with other comments later
     dplyr::rename_at(

From 8acb717d39b39cbb7fd09482cc4e27f34a3b50f2 Mon Sep 17 00:00:00 2001
From: Balthasar <balthasar.sager@gmail.com>
Date: Tue, 20 Apr 2021 15:11:49 +0200
Subject: [PATCH 6/6] divide up into helpers and main functions, documentation,
 etc.

---
 R/get_most_comments.R    | 171 +++++++++++++--------------------------
 R/globals.R              |   2 +-
 man/get_most_comments.Rd |  58 +++----------
 3 files changed, 65 insertions(+), 166 deletions(-)

diff --git a/R/get_most_comments.R b/R/get_most_comments.R
index fbd260e..c308882 100644
--- a/R/get_most_comments.R
+++ b/R/get_most_comments.R
@@ -38,59 +38,8 @@ parse_replies <- function(comment_thread) {
   }
 }
 
-#' Get Most Comments
-#'
-#' @param filter string; Required.
-#' named vector of length 1
-#' potential names of the entry in the vector:
-#' \code{video_id}: video ID.
-#' \code{channel_id}: channel ID.
-#' \code{thread_id}: comma-separated list of comment thread IDs
-#' \code{threads_related_to_channel}: channel ID.
-#'
-#' @param part  Comment resource requested. Required. Comma separated list
-#' of one or more of the
-#' following: \code{id, snippet}. e.g., \code{"id, snippet"},
-#' \code{"id"}, etc. Default: \code{snippet}.
-#' @param max_results  Maximum number of items that should be returned.
-#'  Integer. Optional. Default is 100.
-#' If the value is greater than 100 then the function fetches all the
-#' results. The outcome is a simplified \code{data.frame}.
-#' @param page_token  Specific page in the result set that should be
-#' returned. Optional.
-#' @param text_format Data Type: Character. Default is \code{"html"}.
-#' Only takes \code{"html"} or \code{"plainText"}. Optional.
-#' @param \dots Additional arguments passed to \code{\link{tuber_GET}}.
-#'
-#' @return
-#' Nested named list. The entry \code{items} is a list of comments
-#' along with meta information.
-#' Within each of the \code{items} is an item \code{snippet} which
-#' has an item \code{topLevelComment$snippet$textDisplay}
-#' that contains the actual comment.
-#'
-#' If simplify is \code{TRUE}, a \code{data.frame} with the following columns:
-#' \code{authorDisplayName, authorProfileImageUrl, authorChannelUrl,
-#' authorChannelId.value, videoId, textDisplay,
-#' canRate, viewerRating, likeCount, publishedAt, updatedAt}
-#'
-#' @export get_most_comments
-#'
-#' @references \url{https://developers.google.com/youtube/v3/docs/commentThreads/list}
-#'
-#' @examples
-#' \dontrun{
-#'
-#' # Set API token via yt_oauth() first
-#'
-#' get_most_comments(filter = c(video_id = "N708P-A45D0"))
-#' get_most_comments(filter = c(video_id = "N708P-A45D0"), max_results = 101)
-#' }
-#'
-#'
-
-get_most_comments <- function(filter = NULL, part = "snippet,replies,id",
-                              text_format = "html", max_results = 100, page_token = NULL, ...) {
+get_parse_bind_comments <- function(filter = NULL, page_token = NULL,
+                                    part = part, text_format = text_format, max_results = 100) {
   if (max_results < 20) {
     stop("max_results only takes a value over 20.
             Above 100, it outputs all the results.")
@@ -101,7 +50,7 @@ get_most_comments <- function(filter = NULL, part = "snippet,replies,id",
   }
 
   if (!(names(filter) %in%
-    c("video_id", "channel_id", "thread_id", "threads_related_to_channel"))) {
+        c("video_id", "channel_id", "thread_id", "threads_related_to_channel"))) {
     stop("filter can only take one of values: channel_id, video_id, parent_id,
         threads_related_to_channel.")
   }
@@ -129,12 +78,12 @@ get_most_comments <- function(filter = NULL, part = "snippet,replies,id",
   )
 
   querylist <- c(querylist, filter)
-  print(querylist)
+  # print(querylist)
 
   ## get first page of results of a comment thread and
   ## initialize objects with content of first page before
   ## proceeding to next pages of API response
-  res <- tuber:::tuber_GET("commentThreads", querylist, ...)
+  res <- tuber_GET("commentThreads", querylist)
   # parse results
   snippet <- parse_snippet(res)
   comment_thread <- parse_comment_thread(snippet)
@@ -162,57 +111,67 @@ get_most_comments <- function(filter = NULL, part = "snippet,replies,id",
 
   #   # shouldn't this be `unique()`?
   # next_page_token <- unique(res$nextPageToken)
-  print("erstes Mal")
-  print(next_page_token)
+  # print("erstes Mal")
+  # print(next_page_token)
 
-  print("finished")
+  # print("finished")
 
   comments
 }
 
-library(magrittr)
-tuber::yt_oauth(
-  app_id = Sys.getenv("YOUTUBE_API_APP_ID"),
-  app_secret = Sys.getenv("YOUTUBE_API_CLIENT_SECRET")
-)
-
-all_data <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = NULL)
-counter_while <- 0
-# next_page_token <- NULL
-next_page_token <- unique(all_data$nextPageToken)
-
-while (counter_while == 0 | !is.null(next_page_token)) {
-  next_data <- get_most_comments(
-    filter = c(video_id = "Hop_MfkXl7c"),
-    page_token = next_page_token
-  )
-  next_page_token <- unique(next_data$nextPageToken)
-  print(next_data)
-  counter_while <- counter_while + 1
-  message(paste(counter_while, ":", "counter_while"))
-  message(paste(next_page_token, ":", "next_page_token"))
-  all_data <- dplyr::bind_rows(next_data, all_data)
-}
-return(all_data)
 
-retrieve_data_from_paginated_api <- function(video_id_input) {
+#' Get Most Comments
+#'
+#' Retrieves all top level comments and replies to them.
+#' Replies to replies are not included.
+#' @param video_id ID of video, required.
+#' @return
+#' Data frame with all comments and replies.
+#'
+#' @export get_most_comments
+#'
+#' @examples
+#' \dontrun{
+#'
+#' # Set API token via yt_oauth() first
+#'
+#' get_most_comments(video_id = "Hop_MfkXl7c")
+#' }
+#'
+#'
+#'
+get_most_comments <- function(video_id) {
+  video_id_arg <- video_id
+  part_arg <- "snippet,replies,id"
+  filter_arg <- c(video_id = video_id_arg)
+  text_format_arg <- "html"
 
   # initialize objects for loop
-  all_data <- get_most_comments(filter = c(video_id = video_id_input), page_token = NULL)
+  all_data <- get_parse_bind_comments(
+    filter = filter_arg,
+    page_token = NULL,
+    part = part_arg,
+    text_format = text_format_arg,
+    max_results = 100
+    )
   counter_while <- 0
-  next_page_token <- unique(all_data$nextPageToken)
+  suppressWarnings(next_page_token <- unique(all_data$nextPageToken))
 
   # loop over results until last nextPageToken
   while (counter_while == 0 | !is.null(next_page_token)) {
-    next_data <- get_most_comments(
-      filter = c(video_id = video_id_input),
-      page_token = next_page_token
+    next_data <- get_parse_bind_comments(
+      filter = filter_arg,
+      page_token = next_page_token,
+      part = part_arg,
+      text_format = text_format_arg,
+      max_results = 100
     )
+    counter_while <- counter_while + 1
+    # cli::cli_alert_success("Page {counter_while} packages.")
 
     # overwrite `next_page_token` that was initialized outside loop
     # with new content that was just retrieved in the data
-    next_page_token <- unique(next_data$nextPageToken)
-    counter_while <- counter_while + 1
+    suppressWarnings(next_page_token <- unique(all_data$nextPageToken))
 
     # overwrite `all_data` that was initialized outside loop
     # using `all_data` from outside of loop in first iteration
@@ -223,28 +182,8 @@ retrieve_data_from_paginated_api <- function(video_id_input) {
   return(all_data)
 }
 
-# new_example_with_function <- retrieve_data_from_paginated_api(video_id_input = "Hop_MfkXl7c")
-# new_example_with_function %>%
-#   dplyr::filter(totalReplyCount > 1 & is_reply)
-# length(unique(new_example_with_function$snippet_textOriginal))
-
-# example <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = NULL)
-# example %>%
-#   tidyr::unnest(nextPageToken)
-#
-# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = "QURTSl9pMjVDT2V3WGF5Nm5ha3ZYam1HbWVPMFVybTJuWk96R2UyOTZwNnVwSXFJdTJfUFVpSVI3VUxqbU1TSGpKWVpCcFpITEl4cm83dw==")
-# tuber::get_comment_threads(filter = c(video_id = "Hop_MfkXl7c")) %>%
-#   tibble::as_tibble()
-#
-# res_2 <- get_most_comments(filter = c(video_id = "Hop_MfkXl7c"))
-#
-# res_2$nextPageToken
-#
-# t1 <- "QURTSl9pMnM3TFpxY2FQUTVDNGVfTTBDRUF0Nm52R0RXNGRuM1R3a21fMDZhemR0aUtDeHRTWnV3UXpmREs0cnI0TmYzOXh2VTlzRXdOYw=="
-# t2 <- "QURTSl9pMHdOWXBXWlRBakdlQnk1VXBUNEljSXM0QTU0WkNuOEp2VFBiX0RMQlhMeDVGdEo1UTlpWm5BaEFTRGZZZWU4UmV0LVFMTmFydw=="
-# t3 <- "QURTSl9pMlY5M1Q1VkxsZW9RbXR0VG1acXkzRjRMaDYxcEc4c05UenVUc0VIR0tXbmZmVHU2V3RVdC1WUmlHaW5wa19WUlJfS19vSGEzNA=="
-#
-# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = t1)
-# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = t2)
-# get_most_comments(filter = c(video_id = "Hop_MfkXl7c"), page_token = t3)
-#
+# tuber::yt_oauth(
+#   app_id = Sys.getenv("YOUTUBE_API_APP_ID"),
+#   app_secret = Sys.getenv("YOUTUBE_API_CLIENT_SECRET")
+# )
+# get_most_comments(video_id = "Hop_MfkXl7c")
diff --git a/R/globals.R b/R/globals.R
index 100c9e0..1af4aad 100644
--- a/R/globals.R
+++ b/R/globals.R
@@ -1,3 +1,3 @@
 # fix warning about no visible bindings due to tidyverse functions
 # https://community.rstudio.com/t/how-to-solve-no-visible-binding-for-global-variable-note/28887
-utils::globalVariables(c("totalReplyCount", "id", "comments", "kind", "etag", "items", "snippet"))
+utils::globalVariables(c("totalReplyCount", "id", "comments", "kind", "etag", "items", "snippet", "video_id_arg"))
diff --git a/man/get_most_comments.Rd b/man/get_most_comments.Rd
index 48cd150..10e964c 100644
--- a/man/get_most_comments.Rd
+++ b/man/get_most_comments.Rd
@@ -4,66 +4,26 @@
 \alias{get_most_comments}
 \title{Get Most Comments}
 \usage{
-get_most_comments(
-  filter = NULL,
-  part = "snippet,replies",
-  text_format = "html",
-  max_results = 101,
-  page_token = NULL,
-  ...
-)
+get_most_comments(video_id)
 }
 \arguments{
-\item{filter}{string; Required.
-named vector of length 1
-potential names of the entry in the vector:
-\code{video_id}: video ID.
-\code{channel_id}: channel ID.
-\code{thread_id}: comma-separated list of comment thread IDs
-\code{threads_related_to_channel}: channel ID.}
-
-\item{part}{Comment resource requested. Required. Comma separated list
-of one or more of the
-following: \code{id, snippet}. e.g., \code{"id, snippet"},
-\code{"id"}, etc. Default: \code{snippet}.}
-
-\item{text_format}{Data Type: Character. Default is \code{"html"}.
-Only takes \code{"html"} or \code{"plainText"}. Optional.}
-
-\item{max_results}{Maximum number of items that should be returned.
- Integer. Optional. Default is 100.
-If the value is greater than 100 then the function fetches all the
-results. The outcome is a simplified \code{data.frame}.}
-
-\item{page_token}{Specific page in the result set that should be
-returned. Optional.}
-
-\item{\dots}{Additional arguments passed to \code{\link{tuber_GET}}.}
+\item{video_id}{ID of video, required.}
 }
 \value{
-Nested named list. The entry \code{items} is a list of comments
-along with meta information.
-Within each of the \code{items} is an item \code{snippet} which
-has an item \code{topLevelComment$snippet$textDisplay}
-that contains the actual comment.
-
-If simplify is \code{TRUE}, a \code{data.frame} with the following columns:
-\code{authorDisplayName, authorProfileImageUrl, authorChannelUrl,
-authorChannelId.value, videoId, textDisplay,
-canRate, viewerRating, likeCount, publishedAt, updatedAt}
+Data frame with all comments and replies.
 }
 \description{
-Get Most Comments
+Retrieves all top level comments and replies to them.
+Replies to replies are not included.
 }
 \examples{
 \dontrun{
 
 # Set API token via yt_oauth() first
 
-get_most_comments(filter = c(video_id = "N708P-A45D0"))
-get_most_comments(filter = c(video_id = "N708P-A45D0"), max_results = 101)
-}
+get_most_comments(video_id = "Hop_MfkXl7c")
 }
-\references{
-\url{https://developers.google.com/youtube/v3/docs/commentThreads/list}
+
+
+
 }