Skip to content

Commit

Permalink
Add new as.data.frame method
Browse files Browse the repository at this point in the history
  • Loading branch information
dieghernan committed Feb 15, 2024
1 parent d568801 commit b021277
Show file tree
Hide file tree
Showing 20 changed files with 1,421 additions and 46 deletions.
7 changes: 6 additions & 1 deletion .lintr
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
linters: linters_with_defaults() # see vignette("lintr")
encoding: "UTF-8"
exclusions: list("data-raw")
exclusions: list(
"data-raw",
"tests/testthat/test_ci/test-full_cff.R",
"vignettes/cffr.Rmd",
"vignettes/bibtex_cff.Rmd"
)
1 change: 1 addition & 0 deletions NAMESPACE
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Generated by roxygen2: do not edit by hand

S3method(as.data.frame,cff)
S3method(c,cff)
S3method(print,cff)
export(as.cff)
Expand Down
2 changes: 2 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@
- The conversion from `cff` to `bibentry` is performed now by a new function
`cff_to_bibentry()`. Previous names of this function were `cff_to_bibtex()`
and `cff_extract_to_bibtex()` that are now superseded.
- New methods:
- `as.data.frame.cff()`

## Changes on bibtex crosswalk

Expand Down
11 changes: 6 additions & 5 deletions R/assertions.R
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,11 @@ is_email <- function(email) {
return(FALSE)
}

# See https://www.nicebread.de/validating-email-adresses-in-r/
x <- grepl("\\<[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,}\\>",
as.character(email),
ignore.case = TRUE
email <- trimws(as.character(email))

# See CFF validation schema
x <- grepl("^[\\S]+@[\\S]+\\.[\\S]{2,}$",email ,

Check warning on line 12 in R/assertions.R

View workflow job for this annotation

GitHub Actions / Run lintr scanning

file=R/assertions.R,line=12,col=44,[commas_linter] Commas should always have a space after.

Check warning on line 12 in R/assertions.R

View workflow job for this annotation

GitHub Actions / Run lintr scanning

file=R/assertions.R,line=12,col=49,[commas_linter] Commas should never have a space before.

Check notice

Code scanning / lintr

Commas should always have a space after. Note

Commas should always have a space after.

Check notice

Code scanning / lintr

Commas should never have a space before. Note

Commas should never have a space before.
ignore.case = TRUE, perl = TRUE

Check warning on line 13 in R/assertions.R

View workflow job for this annotation

GitHub Actions / Run lintr scanning

file=R/assertions.R,line=13,col=13,[indentation_linter] Indentation should be 4 spaces but is 13 spaces.

Check notice

Code scanning / lintr

Indentation should be 4 spaces but is 13 spaces. Note

Indentation should be 4 spaces but is 13 spaces.
)
x
}
Expand All @@ -22,7 +23,7 @@ is_url <- function(url) {
return(FALSE)
}

x <- grepl("^http://|^https://|^ftp://|sftp://", url)
x <- grepl("^(https|http|ftp|sftp)://.+", url)
x
}

Expand Down
46 changes: 46 additions & 0 deletions R/cff-methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#' Print Values
#'
#' @noRd
#' @export
print.cff <- function(x, ...) {
cat(yaml::as.yaml(x))
}

#' Combine Values into a Vector or List
#'
#' @source
#' Based on `?c.person` \CRANpkg{utils}.
#'
#' <https://github.com/wch/r-source/blob/trunk/src/library/utils/R/citation.R>
#'
#' @noRd
#' @export
c.cff <-
function(..., recursive = FALSE) {
args <- list(...)
args <- lapply(args, unclass)
rval <- do.call("c", args)
class(rval) <- "cff"
rval
}


#' Coerce to a Data Frame
#'
#' @noRd
#' @export
as.data.frame.cff <- function(x, row.names = NULL, optional = FALSE, ...) {

Check warning on line 32 in R/cff-methods.R

View workflow job for this annotation

GitHub Actions / Run lintr scanning

file=R/cff-methods.R,line=32,col=34,[object_name_linter] Variable and function name style should match snake_case or symbols.

Check notice

Code scanning / lintr

Variable and function name style should match snake_case or symbols. Note

Variable and function name style should match snake_case or symbols.
# If the cff is unnamed is a list of persons/references
if (is.null(names(x))) {
the_df <- cff_list_to_df(x)
} else {
the_df <- cff_to_df(x)
}

the_df <- as.data.frame(the_df,
row.names = row.names, optional = optional,
...
)

return(the_df)
}
22 changes: 0 additions & 22 deletions R/cff_read.R
Original file line number Diff line number Diff line change
Expand Up @@ -159,28 +159,6 @@ as.cff <- function(x) {
x
}


# Print method

#' @export
print.cff <- function(x, ...) {
cat(yaml::as.yaml(x))
}

# c method
# Based on c.person (utils package)
# https://github.com/wch/r-source/blob/trunk/src/library/utils/R/citation.R

#' @export
c.cff <-
function(..., recursive = FALSE) {
args <- list(...)
args <- lapply(args, unclass)
rval <- do.call("c", args)
class(rval) <- "cff"
rval
}

# Helper----

#' Recursively clean lists and assign cff classes
Expand Down
4 changes: 0 additions & 4 deletions R/cff_to_bibentry.R
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,6 @@ cff_to_bibentry <- function(x,
return(pref)
}

#' @export
#' @rdname cff_to_bibentry
#' @usage NULL
cff_to_bibtex <- cff_to_bibentry

cff_bibtex_parser <- function(x) {

Check warning on line 131 in R/cff_to_bibentry.R

View workflow job for this annotation

GitHub Actions / Run lintr scanning

file=R/cff_to_bibentry.R,line=131,col=1,[cyclocomp_linter] Functions should have cyclomatic complexity of less than 15, this has 51.
if (is.null(x)) {
Expand Down
21 changes: 17 additions & 4 deletions R/deprecated.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,9 @@
#' Previous API: Create BibTeX entries from several sources
#'
#' @description
#' `r lifecycle::badge('superseded')`
#' Please use [cff_to_bibentry()] instead.
#' `r lifecycle::badge('superseded')` Please use [cff_to_bibentry()] instead.
#'
#' @rdname previous_cff_to_bib
#' @rdname renamed_cff_to_bib
#' @inheritParams cff_to_bibentry
#' @export
#' @keywords internal
Expand All @@ -25,7 +24,21 @@ cff_extract_to_bibtex <- function(x,
if (requireNamespace("lifecycle", quietly = TRUE)) {
lifecycle::deprecate_soft(
"0.5.0", "cff_extract_to_bibtex()",
"cff_to_bibentry()"
details = "Function renamed, use `cff_to_bibentry()` instead."
)
}
cff_to_bibentry(x, what)
}

#' @rdname renamed_cff_to_bib
#' @export
#' @keywords internal
cff_to_bibtex <- function(x,
what = c("preferred", "references", "all")) {
if (requireNamespace("lifecycle", quietly = TRUE)) {
lifecycle::deprecate_soft(
"0.5.0", "cff_extract_to_bibtex()",
details = "Function renamed, use `cff_to_bibentry()` instead."
)
}
cff_to_bibentry(x, what)
Expand Down
8 changes: 4 additions & 4 deletions R/cff-class.R → R/docs.R
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
#'
#' @name cff-class
#'
#' @description
#' TODO
#'
#' @keywords internal
#'
#' @description
#'
#'
#' ```{r child = "man/chunks/cffclass.Rmd"}
#' ```
NULL

Check notice

Code scanning / lintr

Trailing blank lines are superfluous. Note

Trailing blank lines are superfluous.
142 changes: 142 additions & 0 deletions R/utils-methods.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
# Utils for df ----
unnamed_to_df <- function(key, nm) {
key_l <- as.integer(lengths(key))
m <- matrix(unlist(key), nrow = 1)
df <- as.data.frame(m)
names(df) <- paste0(nm, ".", sprintf("%02d", seq_len(key_l) - 1))
return(df)
}

named_to_df <- function(key, nm) {
key_un <- unlist(key)


m <- matrix(as.character(key_un), nrow = 1)
df <- as.data.frame(m)
names(df) <- names(key_un)
return(df)
}

nested_named_to_df <- function(key, nm) {
key_unlist <- key[[1]]
key_len <- seq_len(length(key_unlist))

df_l_type3 <- lapply(key_len, function(z) {
df <- cff_to_df(key_unlist[[z]])

# Prepend names
names(df) <- paste0(nm, ".", sprintf("%02d", z - 1), ".", names(df))
return(df)
})

df_list_to_df(df_l_type3)
}

prefcit_to_df <- function(key, nm = "preferred_citation.") {
key_df <- cff_to_df(key[[1]])
names(key_df) <- paste0(nm, names(key_df))
return(key_df)
}

reflist_to_df <- function(key, nm) {
key_unlist <- key[[1]]
key_len <- seq_len(length(key_unlist))

prefix_key <- paste0(nm, ".", sprintf("%02d", key_len - 1), ".")

df_l <- lapply(key_len, function(y) {
key_l <- key_unlist[y]
nm_pref <- prefix_key[y]

dff <- prefcit_to_df(key_l, nm_pref)

dff
})

final_df <- df_list_to_df(df_l)

final_df
}

df_list_to_df <- function(x) {
# Clean NULL
df_l_clean <- x[!vapply(x, is.null, logical(1))]

final_df <- do.call(cbind, df_l_clean)
return(final_df)
}
cff_to_df <- function(x) {
# CFF has different models
# type 1: unnamed arrays
unnamed_array <- c("keywords", "languages", "patent-states")


# type 2: named arrays
named_array <- c(
"conference", "database-provider", "institution",
"location", "publisher"
)


# type 3: nested named arrays
nested_named_array <- c(
"authors", "contact", "editors", "editors-series",
"recipients", "senders", "translators", "identifiers"
)

nms <- names(x)
x_len <- seq_len(length(x))


df_l <- lapply(x_len, function(y) {
nm <- nms[y]

if (nm %in% unnamed_array) {
return(unnamed_to_df(x[y], nm))
}
if (nm %in% named_array) {
return(named_to_df(x[y], nm))
}
if (nm %in% nested_named_array) {
return(nested_named_to_df(x[y], nm))
}
if (nm == "preferred-citation") {
return(prefcit_to_df(x[y]))
}

if (nm == "references") {
return(reflist_to_df(x[y], nm))
}

the_df <- as.data.frame(x[[y]])
names(the_df) <- gsub("-", "_", nm)
return(the_df)
})

final_df <- df_list_to_df(df_l)

return(final_df)
}

cff_list_to_df <- function(x) {
# Applicable to lists of persons or references
# Guess type
if (!"type" %in% names(x[[1]])) {
guess <- "person"
} else {
guess <- "reference"

Check warning on line 127 in R/utils-methods.R

View check run for this annotation

Codecov / codecov/patch

R/utils-methods.R#L127

Added line #L127 was not covered by tests
}


x_len <- seq_len(length(x))
df_l <- lapply(x_len, function(y) {
df <- as.data.frame(x[y])
newnames <- paste0(guess, ".", sprintf("%02d", y - 1), ".", names(df))
names(df) <- newnames

df
})

df_end <- df_list_to_df(df_l)
df_end
}
Loading

0 comments on commit b021277

Please sign in to comment.