Add new as.data.frame method

ropensci · Feb 15, 2024 · b021277 · b021277
1 parent d568801
commit b021277
Show file tree

Hide file tree

Showing 20 changed files with 1,421 additions and 46 deletions.
diff --git a/.lintr b/.lintr
@@ -1,3 +1,8 @@
 linters: linters_with_defaults() # see vignette("lintr")
 encoding: "UTF-8"
-exclusions: list("data-raw")
+exclusions: list(
+                  "data-raw",
+                  "tests/testthat/test_ci/test-full_cff.R",
+                  "vignettes/cffr.Rmd",
+                  "vignettes/bibtex_cff.Rmd"
+                )
diff --git a/NAMESPACE b/NAMESPACE
@@ -1,5 +1,6 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(as.data.frame,cff)
 S3method(c,cff)
 S3method(print,cff)
 export(as.cff)

diff --git a/NEWS.md b/NEWS.md
@@ -5,6 +5,8 @@
 -   The conversion from `cff` to `bibentry` is performed now by a new function
     `cff_to_bibentry()`. Previous names of this function were `cff_to_bibtex()`
     and `cff_extract_to_bibtex()` that are now superseded.
+-   New methods:
+    -   `as.data.frame.cff()`
 
 ## Changes on bibtex crosswalk
 

diff --git a/R/assertions.R b/R/assertions.R
@@ -6,10 +6,11 @@ is_email <- function(email) {
     return(FALSE)
   }
 
-  # See https://www.nicebread.de/validating-email-adresses-in-r/
-  x <- grepl("\\<[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\>",
-    as.character(email),
-    ignore.case = TRUE
+  email <- trimws(as.character(email))
+
+  # See CFF  validation schema
+  x <- grepl("^[\\S]+@[\\S]+\\.[\\S]{2,}$",email ,
+             ignore.case = TRUE, perl = TRUE
   )
   x
 }
@@ -22,7 +23,7 @@ is_url <- function(url) {
     return(FALSE)
   }
 
-  x <- grepl("^http://|^https://|^ftp://|sftp://", url)
+  x <- grepl("^(https|http|ftp|sftp)://.+", url)
   x
 }
 

diff --git a/R/cff-methods.R b/R/cff-methods.R
@@ -0,0 +1,46 @@
+#' Print Values
+#'
+#' @noRd
+#' @export
+print.cff <- function(x, ...) {
+  cat(yaml::as.yaml(x))
+}
+
+#' Combine Values into a Vector or List
+#'
+#' @source
+#' Based on `?c.person` \CRANpkg{utils}.
+#'
+#' <https://github.com/wch/r-source/blob/trunk/src/library/utils/R/citation.R>
+#'
+#' @noRd
+#' @export
+c.cff <-
+  function(..., recursive = FALSE) {
+    args <- list(...)
+    args <- lapply(args, unclass)
+    rval <- do.call("c", args)
+    class(rval) <- "cff"
+    rval
+  }
+
+
+#' Coerce to a Data Frame
+#'
+#' @noRd
+#' @export
+as.data.frame.cff <- function(x, row.names = NULL, optional = FALSE, ...) {
+  # If the cff is unnamed is a list of persons/references
+  if (is.null(names(x))) {
+    the_df <- cff_list_to_df(x)
+  } else {
+    the_df <- cff_to_df(x)
+  }
+
+  the_df <- as.data.frame(the_df,
+    row.names = row.names, optional = optional,
+    ...
+  )
+
+  return(the_df)
+}
diff --git a/R/cff_read.R b/R/cff_read.R
@@ -159,28 +159,6 @@ as.cff <- function(x) {
   x
 }
 
-
-# Print method
-
-#' @export
-print.cff <- function(x, ...) {
-  cat(yaml::as.yaml(x))
-}
-
-# c method
-# Based on c.person (utils package)
-# https://github.com/wch/r-source/blob/trunk/src/library/utils/R/citation.R
-
-#' @export
-c.cff <-
-  function(..., recursive = FALSE) {
-    args <- list(...)
-    args <- lapply(args, unclass)
-    rval <- do.call("c", args)
-    class(rval) <- "cff"
-    rval
-  }
-
 # Helper----
 
 #' Recursively clean lists and assign cff classes

diff --git a/R/cff_to_bibentry.R b/R/cff_to_bibentry.R
@@ -127,10 +127,6 @@ cff_to_bibentry <- function(x,
   return(pref)
 }
 
-#' @export
-#' @rdname cff_to_bibentry
-#' @usage NULL
-cff_to_bibtex <- cff_to_bibentry
 
 cff_bibtex_parser <- function(x) {
   if (is.null(x)) {

diff --git a/R/deprecated.R b/R/deprecated.R
@@ -1,10 +1,9 @@
 #' Previous API: Create BibTeX entries from several sources
 #'
 #' @description
-#' `r lifecycle::badge('superseded')`
-#' Please use [cff_to_bibentry()] instead.
+#' `r lifecycle::badge('superseded')` Please use [cff_to_bibentry()] instead.
 #'
-#' @rdname previous_cff_to_bib
+#' @rdname renamed_cff_to_bib
 #' @inheritParams cff_to_bibentry
 #' @export
 #' @keywords internal
@@ -25,7 +24,21 @@ cff_extract_to_bibtex <- function(x,
   if (requireNamespace("lifecycle", quietly = TRUE)) {
     lifecycle::deprecate_soft(
       "0.5.0", "cff_extract_to_bibtex()",
-      "cff_to_bibentry()"
+      details = "Function renamed, use `cff_to_bibentry()` instead."
+    )
+  }
+  cff_to_bibentry(x, what)
+}
+
+#' @rdname renamed_cff_to_bib
+#' @export
+#' @keywords internal
+cff_to_bibtex  <- function(x,
+                           what = c("preferred", "references", "all")) {
+  if (requireNamespace("lifecycle", quietly = TRUE)) {
+    lifecycle::deprecate_soft(
+      "0.5.0", "cff_extract_to_bibtex()",
+      details = "Function renamed, use `cff_to_bibentry()` instead."
     )
   }
   cff_to_bibentry(x, what)

diff --git a/R/cff-class.R → R/docs.R b/R/cff-class.R → R/docs.R
@@ -2,11 +2,11 @@
 #'
 #' @name cff-class
 #'
-#' @description
-#' TODO
-#'
 #' @keywords internal
 #'
+#' @description
 #'
-#'
+#' ```{r child = "man/chunks/cffclass.Rmd"}
+#' ```
 NULL
+
diff --git a/R/utils-methods.R b/R/utils-methods.R
@@ -0,0 +1,142 @@
+# Utils for df ----
+unnamed_to_df <- function(key, nm) {
+  key_l <- as.integer(lengths(key))
+  m <- matrix(unlist(key), nrow = 1)
+  df <- as.data.frame(m)
+  names(df) <- paste0(nm, ".", sprintf("%02d", seq_len(key_l) - 1))
+  return(df)
+}
+
+named_to_df <- function(key, nm) {
+  key_un <- unlist(key)
+
+
+  m <- matrix(as.character(key_un), nrow = 1)
+  df <- as.data.frame(m)
+  names(df) <- names(key_un)
+  return(df)
+}
+
+nested_named_to_df <- function(key, nm) {
+  key_unlist <- key[[1]]
+  key_len <- seq_len(length(key_unlist))
+
+  df_l_type3 <- lapply(key_len, function(z) {
+    df <- cff_to_df(key_unlist[[z]])
+
+    # Prepend names
+    names(df) <- paste0(nm, ".", sprintf("%02d", z - 1), ".", names(df))
+    return(df)
+  })
+
+  df_list_to_df(df_l_type3)
+}
+
+prefcit_to_df <- function(key, nm = "preferred_citation.") {
+  key_df <- cff_to_df(key[[1]])
+  names(key_df) <- paste0(nm, names(key_df))
+  return(key_df)
+}
+
+reflist_to_df <- function(key, nm) {
+  key_unlist <- key[[1]]
+  key_len <- seq_len(length(key_unlist))
+
+  prefix_key <- paste0(nm, ".", sprintf("%02d", key_len - 1), ".")
+
+  df_l <- lapply(key_len, function(y) {
+    key_l <- key_unlist[y]
+    nm_pref <- prefix_key[y]
+
+    dff <- prefcit_to_df(key_l, nm_pref)
+
+    dff
+  })
+
+  final_df <- df_list_to_df(df_l)
+
+  final_df
+}
+
+df_list_to_df <- function(x) {
+  # Clean NULL
+  df_l_clean <- x[!vapply(x, is.null, logical(1))]
+
+  final_df <- do.call(cbind, df_l_clean)
+  return(final_df)
+}
+cff_to_df <- function(x) {
+  # CFF has different models
+  # type 1: unnamed arrays
+  unnamed_array <- c("keywords", "languages", "patent-states")
+
+
+  # type 2: named arrays
+  named_array <- c(
+    "conference", "database-provider", "institution",
+    "location", "publisher"
+  )
+
+
+  # type 3: nested named arrays
+  nested_named_array <- c(
+    "authors", "contact", "editors", "editors-series",
+    "recipients", "senders", "translators", "identifiers"
+  )
+
+  nms <- names(x)
+  x_len <- seq_len(length(x))
+
+
+  df_l <- lapply(x_len, function(y) {
+    nm <- nms[y]
+
+    if (nm %in% unnamed_array) {
+      return(unnamed_to_df(x[y], nm))
+    }
+    if (nm %in% named_array) {
+      return(named_to_df(x[y], nm))
+    }
+    if (nm %in% nested_named_array) {
+      return(nested_named_to_df(x[y], nm))
+    }
+    if (nm == "preferred-citation") {
+      return(prefcit_to_df(x[y]))
+    }
+
+    if (nm == "references") {
+      return(reflist_to_df(x[y], nm))
+    }
+
+    the_df <- as.data.frame(x[[y]])
+    names(the_df) <- gsub("-", "_", nm)
+    return(the_df)
+  })
+
+  final_df <- df_list_to_df(df_l)
+
+  return(final_df)
+}
+
+cff_list_to_df <- function(x) {
+  # Applicable to lists of persons or references
+  # Guess type
+  if (!"type" %in% names(x[[1]])) {
+    guess <- "person"
+  } else {
+    guess <- "reference"
+  }
+
+
+  x_len <- seq_len(length(x))
+  df_l <- lapply(x_len, function(y) {
+    df <- as.data.frame(x[y])
+    newnames <- paste0(guess, ".", sprintf("%02d", y - 1), ".", names(df))
+    names(df) <- newnames
+
+    df
+  })
+
+  df_end <- df_list_to_df(df_l)
+  df_end
+}