Skip to content

Commit a529a0d

Browse files
committed
Weekend experiment with a custom query operator language for R/DBI.
0 parents  commit a529a0d

35 files changed

+3858
-0
lines changed

.Rbuildignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$
3+
^docs$
4+
^README.Rmd$

.gitignore

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata
5+
.DS_Store
6+
*~
7+
man/

DESCRIPTION

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
Package: rquery
2+
Type: Package
3+
Title: Relational Query Algebra for Data Manipulation
4+
Version: 0.1.0
5+
Date: 2017-12-03
6+
Authors@R: c(
7+
person("John", "Mount", email = "[email protected]", role = c("aut", "cre")),
8+
person(family = "Win-Vector LLC", role = c("cph"))
9+
)
10+
Maintainer: John Mount <[email protected]>
11+
Description: Supplies relational algebra style commands for data manipulation.
12+
License: GPL-3
13+
Encoding: UTF-8
14+
Depends:
15+
wrapr (>= 1.0.1)
16+
Imports:
17+
DBI,
18+
RSQLite,
19+
cdata (>= 0.5.0)
20+
LazyData: true
21+
RoxygenNote: 6.0.1
22+
ByteCompile: true
23+

NAMESPACE

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
S3method(column_names,relop_dbi_table)
4+
S3method(column_names,relop_extend)
5+
S3method(column_names,relop_natural_join)
6+
S3method(column_names,relop_order_by)
7+
S3method(column_names,relop_project)
8+
S3method(column_names,relop_select_columns)
9+
S3method(column_names,relop_select_rows)
10+
S3method(format,relop_dbi_table)
11+
S3method(format,relop_extend)
12+
S3method(format,relop_natural_join)
13+
S3method(format,relop_order_by)
14+
S3method(format,relop_project)
15+
S3method(format,relop_select_columns)
16+
S3method(format,relop_select_rows)
17+
S3method(print,relop_dbi_table)
18+
S3method(print,relop_extend)
19+
S3method(print,relop_natural_join)
20+
S3method(print,relop_order_by)
21+
S3method(print,relop_project)
22+
S3method(print,relop_select_columns)
23+
S3method(print,relop_select_rows)
24+
S3method(to_sql,relop_dbi_table)
25+
S3method(to_sql,relop_extend)
26+
S3method(to_sql,relop_natural_join)
27+
S3method(to_sql,relop_order_by)
28+
S3method(to_sql,relop_project)
29+
S3method(to_sql,relop_select_columns)
30+
S3method(to_sql,relop_select_rows)
31+
export(column_names)
32+
export(dbi_copy_to)
33+
export(dbi_table)
34+
export(extend)
35+
export(natural_join)
36+
export(order_by)
37+
export(project)
38+
export(select_columns)
39+
export(select_rows)
40+
export(to_sql)
41+
importFrom(cdata,qlook)
42+
importFrom(wrapr,"%.>%")

R/dataSource.R

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
2+
# basic data sources
3+
4+
#' DBI data source.
5+
#'
6+
#' @param db database connection
7+
#' @param table_name name of table
8+
#' @return a relop representation of the data
9+
#'
10+
#' @examples
11+
#'
12+
#' my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
13+
#' DBI::dbWriteTable(my_db,
14+
#' 'd',
15+
#' data.frame(AUC = 0.6, R2 = 0.2),
16+
#' overwrite = TRUE,
17+
#' temporary = TRUE)
18+
#' d <- dbi_table(my_db, 'd')
19+
#' print(d)
20+
#' sql <- to_sql(d, my_db)
21+
#' cat(sql)
22+
#'
23+
#' @export
24+
#'
25+
dbi_table <- function(db, table_name) {
26+
columns <- cdata:::listFields(db, table_name)
27+
r <- list(columns = columns,
28+
table_name = table_name)
29+
class(r) <- "relop_dbi_table"
30+
r
31+
}
32+
33+
34+
#' @export
35+
column_names.relop_dbi_table <- function (x, ...) {
36+
x$columns
37+
}
38+
39+
#' @export
40+
to_sql.relop_dbi_table <- function (x,
41+
db,
42+
indent_level = 0,
43+
tnum = cdata::makeTempNameGenerator('tsql'),
44+
append_cr = TRUE,
45+
...) {
46+
prefix <- paste(rep(' ', indent_level), collapse = '')
47+
q <- paste0(prefix,
48+
"SELECT * FROM ",
49+
DBI::dbQuoteIdentifier(db, x$table_name))
50+
if(append_cr) {
51+
q <- paste0(q, "\n")
52+
}
53+
q
54+
}
55+
56+
#' @export
57+
format.relop_dbi_table <- function(x, ...) {
58+
paste0("dbi_table('", x$table_name, "')")
59+
}
60+
61+
#' @export
62+
print.relop_dbi_table <- function(x, ...) {
63+
print(format(x),...)
64+
}
65+
66+
67+
#' Local table to DBI data source.
68+
#'
69+
#' @param db database connection.
70+
#' @param table_name name of table to create.
71+
#' @param d data.frame to copy to database.
72+
#' @param ... force later argument to be by name
73+
#' @param overwrite passed to \code{\link[DBI]{dbWriteTable}}.
74+
#' @param temporary passed to \code{\link[DBI]{dbWriteTable}}.
75+
#' @param rowidcolumn character, name to land row-ids.
76+
#' @return a relop representation of the data
77+
#'
78+
#' @examples
79+
#'
80+
#' my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
81+
#' d <- dbi_copy_to(my_db, 'd',
82+
#' data.frame(AUC = 0.6, R2 = 0.2))
83+
#' sql <- to_sql(d, my_db)
84+
#' cat(sql)
85+
#'
86+
#' @export
87+
#'
88+
dbi_copy_to <- function(db, table_name, d,
89+
...,
90+
overwrite = FALSE,
91+
temporary = TRUE,
92+
rowidcolumn = NULL) {
93+
if(length(list(...))>0) {
94+
stop("rquery::dbi_table unexpeced arguments")
95+
}
96+
if(!is.null(rowidcolumn)) {
97+
d[[rowidcolumn]] <- 1:nrow(d)
98+
}
99+
DBI::dbWriteTable(db,
100+
table_name,
101+
d,
102+
overwrite = overwrite,
103+
temporary = temporary)
104+
dbi_table(db, table_name)
105+
}
106+

R/extend.R

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
2+
3+
4+
5+
#' Extend data by adding more columns.
6+
#'
7+
#' partitionby and orderby can only be used with a database that supports window-functions
8+
#' (such as PostgreSQL).
9+
#'
10+
#' @param source source to select from.
11+
#' @param assignments new column assignment expressions.
12+
#' @param ... force later arguments to bind by name
13+
#' @param partitionby partitioning (window function) terms.
14+
#' @param orderby ordering (window function) terms.
15+
#' @param desc reverse order
16+
#' @return extend node.
17+
#'
18+
#' @examples
19+
#'
20+
#' my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
21+
#' d <- dbi_copy_to(my_db, 'd',
22+
#' data.frame(AUC = 0.6, R2 = 0.2))
23+
#' eqn <- extend(d, "v" := "AUC + R2")
24+
#' print(eqn)
25+
#' sql <- to_sql(eqn, my_db)
26+
#' cat(sql)
27+
#' DBI::dbGetQuery(my_db, sql)
28+
#'
29+
#' # SQLite can not run the following query
30+
#' eqn2 <- extend(d, "v" := "rank()",
31+
#' partitionby = "AUC", orderby = "R2")
32+
#' sql2 <- to_sql(eqn2, my_db)
33+
#' cat(sql2)
34+
#'
35+
#' @export
36+
#'
37+
extend <- function(source, assignments,
38+
...,
39+
partitionby = NULL,
40+
orderby = NULL,
41+
desc = FALSE) {
42+
if(length(assignments)<=0) {
43+
stop("rquery::extend must generate at least 1 column")
44+
}
45+
if(length(names(assignments))!=length(unique(names(assignments)))) {
46+
stop("rquery::extend generated column names must be unique")
47+
}
48+
syms <- lapply(assignments,
49+
function(ai) {
50+
find_symbols(parse(text=ai))
51+
})
52+
needs <- unique(c(unlist(syms), partitionby, orderby))
53+
have <- column_names(source)
54+
missing <- setdiff(needs, have)
55+
if(length(missing)>0) {
56+
stop(paste("rquery::extend missing columns",
57+
paste(missing, collapse = ", ")))
58+
}
59+
gint <- intersect(names(assignments), have)
60+
if(length(gint)>0) {
61+
stop(paste("rquery::extend re-used column names:",
62+
paste(gint, collapse = ", ")))
63+
}
64+
r <- list(source = list(source),
65+
partitionby = partitionby,
66+
orderby = orderby,
67+
desc = desc,
68+
columns = names(assignments),
69+
assignments = assignments)
70+
class(r) <- "relop_extend"
71+
r
72+
}
73+
74+
75+
#' @export
76+
column_names.relop_extend <- function (x, ...) {
77+
c(column_names(x$source[[1]]), x$columns)
78+
}
79+
80+
81+
#' @export
82+
format.relop_extend <- function(x, ...) {
83+
pterms <- ""
84+
if(length(x$partitionby)>0) {
85+
pterms <- paste0(";p: ",
86+
paste(x$partitionb, collapse = ", "))
87+
}
88+
oterms <- ""
89+
if(length(x$orderby)>0) {
90+
oterms <- paste0(";o: ",
91+
paste(x$orderby, collapse = ", "),
92+
ifelse(x$desc, " DESC", ""))
93+
}
94+
aterms <- paste(paste(names(x$assignments),
95+
":=",
96+
x$assignments), collapse = ", ")
97+
paste0(format(x$source[[1]]),
98+
" %.>% ",
99+
"extend(., ",
100+
aterms,
101+
pterms,
102+
oterms,
103+
")")
104+
}
105+
106+
#' @export
107+
print.relop_extend <- function(x, ...) {
108+
print(format(x),...)
109+
}
110+
111+
112+
#' @export
113+
to_sql.relop_extend <- function(x,
114+
db,
115+
indent_level = 0,
116+
tnum = cdata::makeTempNameGenerator('tsql'),
117+
append_cr = TRUE,
118+
...) {
119+
cols1 <- column_names(x$source[[1]])
120+
cols <- NULL
121+
if(length(cols1)>0) {
122+
cols <- vapply(cols1,
123+
function(ci) {
124+
DBI::dbQuoteIdentifier(db, ci)
125+
}, character(1))
126+
}
127+
prefix <- paste(rep(' ', indent_level), collapse = '')
128+
derived <- NULL
129+
if(length(x$assignments)>0) {
130+
windowTerm <- ""
131+
if((length(x$partitionby)>0) || (length(x$orderby)>0)) {
132+
windowTerm <- " OVER ( "
133+
if(length(x$partitionby)>0) {
134+
pcols <- vapply(x$partitionby,
135+
function(ci) {
136+
DBI::dbQuoteIdentifier(db, ci)
137+
}, character(1))
138+
windowTerm <- paste0(windowTerm,
139+
" PARTITION BY ",
140+
paste(pcols, collapse = ", "))
141+
}
142+
if(length(x$orderby)>0) {
143+
ocols <- vapply(x$orderby,
144+
function(ci) {
145+
DBI::dbQuoteIdentifier(db, ci)
146+
}, character(1))
147+
windowTerm <- paste0(windowTerm,
148+
" ORDER BY ",
149+
paste(ocols, collapse = ", "))
150+
if(x$desc) {
151+
windowTerm <- paste(windowTerm, "DESC")
152+
}
153+
}
154+
windowTerm <- paste(windowTerm, ")")
155+
}
156+
derived <- vapply(names(x$assignments),
157+
function(ni) {
158+
ei <- x$assignments[[ni]]
159+
paste(ei,
160+
windowTerm,
161+
"AS", DBI::dbQuoteIdentifier(db, ni))
162+
}, character(1))
163+
}
164+
subsql <- to_sql(x$source[[1]],
165+
db = db,
166+
indent_level = indent_level + 1,
167+
tnum = tnum,
168+
append_cr = FALSE)
169+
tab <- tnum()
170+
q <- paste0(prefix, "SELECT\n",
171+
prefix, " ", paste(c(cols, derived), collapse = paste0(",\n", prefix, " ")))
172+
q <- paste0(q, "\n",
173+
prefix, "FROM (\n",
174+
subsql, "\n",
175+
prefix, ") ",
176+
tab)
177+
if(append_cr) {
178+
q <- paste0(q, "\n")
179+
}
180+
q
181+
}

0 commit comments

Comments
 (0)