Skip to content

Commit 5178098

Browse files
authored
fix mcmc.ExpectedCumulativeTransactions for Pareto/NBD (Abe) (#57)
* - fix `mcmc.ExpectedCumulativeTransactions` for Pareto/NBD (Abe) with covariates (#55) - add ability to provide covariates to `abe.generateData` * fix code style
1 parent 2e82c02 commit 5178098

10 files changed

+116
-22
lines changed

DESCRIPTION

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Package: BTYDplus
22
Type: Package
33
Title: Probabilistic Models for Assessing and Predicting your Customer Base
4-
Version: 1.1.1
4+
Version: 1.1.2
55
Authors@R: person("Michael", "Platzer", email = "[email protected]", role = c("aut", "cre"))
66
Description: Provides advanced statistical methods to describe and predict customers'
77
purchase behavior in a non-contractual setting. It uses historic transaction records to fit a

NEWS

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
1.1.2
2+
- fix `mcmc.ExpectedCumulativeTransactions` for Pareto/NBD (Abe) with covariates (#55)
3+
- add ability to provide covariates to `abe.generateData`
4+
15
1.1.1
26
- added `sales.x` to `elog2cbs` output, which sums over sales in calibration, but excludes initial transaction (thanks to msinjin for the suggestion)
37

R/mcmc.R

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -207,6 +207,8 @@ mcmc.plotPActiveDiagnostic <- function(cbs, xstar, title = "Diagnostic Plot for
207207
#' @param x Number of transactions for which probability is calculated. May also
208208
#' be a vector.
209209
#' @param sample_size Sample size for estimating the probability distribution.
210+
#' @param covariates (optional) Matrix of covariates, for Pareto/NBD (Abe)
211+
#' model, passed to \code{\link{abe.GenerateData}} for simulating data.
210212
#' @return \eqn{P(X(t)=x)}. If either \code{t} or \code{x} is a vector, then the
211213
#' output will be a vector as well. If both are vectors, the output will be a
212214
#' matrix.
@@ -218,7 +220,7 @@ mcmc.plotPActiveDiagnostic <- function(cbs, xstar, title = "Diagnostic Plot for
218220
#' mcmc = 200, burnin = 100, thin = 20, chains = 1) # short MCMC to run demo fast
219221
#' mcmc.pmf(param.draws, t = 52, x = 0:6)
220222
#' mcmc.pmf(param.draws, t = c(26, 52), x = 0:6)
221-
mcmc.pmf <- function(draws, t, x, sample_size = 10000) {
223+
mcmc.pmf <- function(draws, t, x, sample_size = 10000, covariates = NULL) {
222224
cohort_draws <- as.matrix(draws$level_2)
223225
nr_of_draws <- nrow(cohort_draws)
224226
# use posterior mean
@@ -238,7 +240,8 @@ mcmc.pmf <- function(draws, t, x, sample_size = 10000) {
238240
p["cov_log_lambda_log_mu"],
239241
p["var_log_mu"]),
240242
ncol = 2)
241-
abe.GenerateData(n = n, T.cal = 0, T.star = unique(t), params = params)$cbs
243+
abe.GenerateData(n = n, T.cal = 0, T.star = unique(t), params = params,
244+
covariates = covariates)$cbs
242245
}
243246
}))
244247
pmf <- sapply(1:length(t), function(idx) {
@@ -302,6 +305,8 @@ mcmc.Expectation <- function(draws, t, sample_size = 10000) {
302305
#' @param n.periods.final Number of time periods in the calibration and holdout
303306
#' periods.
304307
#' @param sample_size Sample size for estimating the probability distribution.
308+
#' @param covariates (optional) Matrix of covariates, for Pareto/NBD (Abe)
309+
#' model, passed to \code{\link{abe.GenerateData}} for simulating data.
305310
#' @return Numeric vector of expected cumulative total repeat transactions by
306311
#' all customers.
307312
#' @export
@@ -314,7 +319,8 @@ mcmc.Expectation <- function(draws, t, sample_size = 10000) {
314319
#' # weeks, with every eigth week being reported.
315320
#' mcmc.ExpectedCumulativeTransactions(param.draws,
316321
#' T.cal = cbs$T.cal, T.tot = 104, n.periods.final = 104/8, sample_size = 1000)
317-
mcmc.ExpectedCumulativeTransactions <- function(draws, T.cal, T.tot, n.periods.final, sample_size = 10000) {
322+
mcmc.ExpectedCumulativeTransactions <- function(draws, T.cal, T.tot, n.periods.final,
323+
sample_size = 10000, covariates = NULL) {
318324
if (any(T.cal < 0) || !is.numeric(T.cal))
319325
stop("T.cal must be numeric and may not contain negative numbers.")
320326
if (length(T.tot) > 1 || T.tot < 0 || !is.numeric(T.tot))
@@ -333,13 +339,14 @@ mcmc.ExpectedCumulativeTransactions <- function(draws, T.cal, T.tot, n.periods.f
333339
} else if (model == "abe") {
334340
p <- as.list(cohort_draws[i, ])
335341
params <- list()
336-
params$beta <- matrix(p[grepl("^log\\_", names(p))], byrow = TRUE, ncol = 2)
337-
params$gamma <- matrix(c(p["var_log_lambda"],
342+
params$beta <- matrix(as.numeric(p[grepl("^log\\_", names(p))]), byrow = TRUE, ncol = 2)
343+
params$gamma <- matrix(as.numeric(c(p["var_log_lambda"],
338344
p["cov_log_lambda_log_mu"],
339345
p["cov_log_lambda_log_mu"],
340-
p["var_log_mu"]),
346+
p["var_log_mu"])),
341347
ncol = 2)
342-
elog <- abe.GenerateData(n = n, T.cal = T.tot, T.star = 0, params = params)$elog
348+
elog <- abe.GenerateData(n = n, T.cal = T.tot, T.star = 0, params = params,
349+
covariates = covariates)$elog
343350
}
344351
setDT(elog)
345352
elog$cust <- paste0(elog$cust, "_", i)
@@ -389,6 +396,8 @@ mcmc.ExpectedCumulativeTransactions <- function(draws, T.cal, T.tot, n.periods.f
389396
#' @param ymax Upper boundary for y axis.
390397
#' @param sample_size Sample size for estimating the probability distribution.
391398
#' See \code{\link{mcmc.ExpectedCumulativeTransactions}}.
399+
#' @param covariates (optional) Matrix of covariates, for Pareto/NBD (Abe)
400+
#' model, passed to \code{\link{abe.GenerateData}} for simulating data.
392401
#' @return Matrix containing actual and expected cumulative repeat transactions.
393402
#' @export
394403
#' @seealso \code{\link{mcmc.PlotTrackingInc}}
@@ -407,10 +416,11 @@ mcmc.ExpectedCumulativeTransactions <- function(draws, T.cal, T.tot, n.periods.f
407416
mcmc.PlotTrackingCum <- function(draws, T.cal, T.tot, actual.cu.tracking.data,
408417
xlab = "Week", ylab = "Cumulative Transactions",
409418
xticklab = NULL, title = "Tracking Cumulative Transactions",
410-
ymax = NULL, sample_size = 10000) {
419+
ymax = NULL, sample_size = 10000, covariates = NULL) {
411420

412421
actual <- actual.cu.tracking.data
413-
expected <- mcmc.ExpectedCumulativeTransactions(draws, T.cal, T.tot, length(actual), sample_size = sample_size)
422+
expected <- mcmc.ExpectedCumulativeTransactions(draws, T.cal, T.tot, length(actual),
423+
sample_size = sample_size, covariates = covariates)
414424

415425
dc.PlotTracking(actual = actual, expected = expected, T.cal = T.cal,
416426
xlab = xlab, ylab = ylab, title = title,
@@ -445,6 +455,8 @@ mcmc.PlotTrackingCum <- function(draws, T.cal, T.tot, actual.cu.tracking.data,
445455
#' @param ymax Upper boundary for y axis.
446456
#' @param sample_size Sample size for estimating the probability distribution.
447457
#' See \code{\link{mcmc.ExpectedCumulativeTransactions}}.
458+
#' @param covariates (optional) Matrix of covariates, for Pareto/NBD (Abe)
459+
#' model, passed to \code{\link{abe.GenerateData}} for simulating data.
448460
#' @return Matrix containing actual and expected incremental repeat
449461
#' transactions.
450462
#' @export
@@ -464,10 +476,11 @@ mcmc.PlotTrackingCum <- function(draws, T.cal, T.tot, actual.cu.tracking.data,
464476
mcmc.PlotTrackingInc <- function(draws, T.cal, T.tot, actual.inc.tracking.data,
465477
xlab = "Week", ylab = "Transactions",
466478
xticklab = NULL, title = "Tracking Weekly Transactions",
467-
ymax = NULL, sample_size = 10000) {
479+
ymax = NULL, sample_size = 10000, covariates = NULL) {
468480

469481
actual <- actual.inc.tracking.data
470-
expected_cum <- mcmc.ExpectedCumulativeTransactions(draws, T.cal, T.tot, length(actual), sample_size = sample_size)
482+
expected_cum <- mcmc.ExpectedCumulativeTransactions(draws, T.cal, T.tot, length(actual),
483+
sample_size = sample_size, covariates = covariates)
471484
expected <- BTYD::dc.CumulativeToIncremental(expected_cum)
472485

473486
dc.PlotTracking(actual = actual, expected = expected, T.cal = T.cal,

R/pareto-nbd-abe.R

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -261,6 +261,7 @@ abe.mcmc.DrawParameters <- function(cal.cbs, covariates = c(), mcmc = 2500, burn
261261
#' @param T.star Length of holdout period. This may be a vector.
262262
#' @param params A list of model parameters: \code{beta} and \code{gamma}.
263263
#' @param date.zero Initial date for cohort start. Can be of class character, Date or POSIXt.
264+
#' @param covariates Provide matrix of customer covariates. If NULL then random covariate values between [-1,1] are drawn.
264265
#' @return List of length 2:
265266
#' \item{\code{cbs}}{A data.frame with a row for each customer and the summary statistic as columns.}
266267
#' \item{\code{elog}}{A data.frame with a row for each transaction, and columns \code{cust}, \code{date} and \code{t}.}
@@ -273,7 +274,7 @@ abe.mcmc.DrawParameters <- function(cal.cbs, covariates = c(), mcmc = 2500, burn
273274
#' data <- abe.GenerateData(n = 2000, T.cal = 32, T.star = 32, params)
274275
#' cbs <- data$cbs # customer by sufficient summary statistic - one row per customer
275276
#' elog <- data$elog # Event log - one row per event/purchase
276-
abe.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01") {
277+
abe.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01", covariates = NULL) {
277278

278279
# set start date for each customer, so that they share same T.cal date
279280
T.cal.fix <- max(T.cal)
@@ -285,9 +286,24 @@ abe.GenerateData <- function(n, T.cal, T.star, params, date.zero = "2000-01-01")
285286
params$beta <- matrix(params$beta, nrow = 1, ncol = 2)
286287

287288
nr_covars <- nrow(params$beta)
288-
covars <- matrix(c(rep(1, n), runif( (nr_covars - 1) * n, -1, 1)), nrow = n, ncol = nr_covars)
289-
colnames(covars) <- paste("covariate", 0:(nr_covars - 1), sep = "_")
290-
colnames(covars)[1] <- "intercept"
289+
if (!is.null(covariates)) {
290+
# ensure that provided covariates are in matrix format, with intercept
291+
covars <- covariates
292+
if (is.data.frame(covars)) covars <- as.matrix(covars)
293+
if (!is.matrix(covars)) covars <- matrix(covars, ncol = 1, dimnames = list(NULL, "covariate_1"))
294+
if (!all(covars[, 1] == 1)) covars <- cbind("intercept" = rep(1, nrow(covars)), covars)
295+
if (is.null(colnames(covars)) & ncol(covars) > 1)
296+
colnames(covars)[-1] <- paste("covariate", 1:(nr_covars - 1), sep = "_")
297+
if (nr_covars != ncol(covars))
298+
stop("provided number of covariate columns does not match implied covariate number by parameter `beta`")
299+
if (n != nrow(covars))
300+
covars <- covars[sample(1:nrow(covars), n, replace = TRUE), ]
301+
} else {
302+
# simulate covariates, if not provided
303+
covars <- matrix(c(rep(1, n), runif( (nr_covars - 1) * n, -1, 1)), nrow = n, ncol = nr_covars)
304+
colnames(covars) <- paste("covariate", 0:(nr_covars - 1), sep = "_")
305+
colnames(covars)[1] <- "intercept"
306+
}
291307

292308
# sample log-normal distributed parameters lambda/mu for each customer
293309
thetas <- exp( (covars %*% params$beta) + mvtnorm::rmvnorm(n, mean = c(0, 0), sigma = params$gamma))

man/abe.GenerateData.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mcmc.ExpectedCumulativeTransactions.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mcmc.PlotTrackingCum.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mcmc.PlotTrackingInc.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/mcmc.pmf.Rd

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-pareto-nbd-abe.R

Lines changed: 47 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,21 +3,50 @@ context("mcmc")
33
test_that("Pareto/NBD (Abe) MCMC", {
44
skip_on_cran()
55

6+
# test basic data simulation
7+
n <- 100
8+
params <- list()
9+
params$beta <- matrix(c(0.18, -2.5, 0.5, -0.3, -0.2, 0.8), byrow = TRUE, ncol = 2)
10+
params$gamma <- matrix(c(0.05, 0.1, 0.1, 0.2), ncol = 2)
11+
expect_silent(abe.GenerateData(n, 36, 36, params,
12+
covariates = matrix(c(rnorm(n), runif(n)), ncol = 2)))
13+
expect_silent(abe.GenerateData(n, 36, 36, params,
14+
covariates = matrix(c(rnorm(n), runif(n)), ncol = 2,
15+
dimnames = list(NULL, c("x1", "x2")))))
16+
expect_error(abe.GenerateData(n, 36, 36, params,
17+
covariates = matrix(c(rnorm(n)), ncol = 1,
18+
dimnames = list(NULL, c("x1")))),
19+
"covariate columns")
20+
expect_error(abe.GenerateData(n, 36, 36, params,
21+
covariates = matrix(c(rnorm(n), runif(n), runif(n)), ncol = 3,
22+
dimnames = list(NULL, c("x1", "x2", "x3")))),
23+
"covariate columns")
24+
params$beta <- params$beta[1:2, ]
25+
expect_silent(abe.GenerateData(n, 36, 36, params, covariates = rnorm(n)))
26+
expect_silent(abe.GenerateData(n, 36, 36, params, covariates = matrix(rnorm(n), ncol = 1,
27+
dimnames = list(NULL, "x1"))))
28+
629
# generate artificial Pareto/NBD (Abe) with 2 covariates
730
set.seed(1)
831
params <- list()
932
params$beta <- matrix(c(0.18, -2.5, 0.5, -0.3, -0.2, 0.8), byrow = TRUE, ncol = 2)
1033
params$gamma <- matrix(c(0.05, 0.1, 0.1, 0.2), ncol = 2)
11-
expect_silent(abe.GenerateData(n = 100, T.cal = 32, T.star = c(16, 32), params))
1234
n <- 5000
1335
sim <- abe.GenerateData(n,
1436
round(runif(n, 36, 96) / 12) * 12,
1537
36,
1638
params,
1739
"2010-01-01")
40+
# TODO: test param recoverability with covars being provided to abe.GenerateData
41+
# covars <- matrix(c(runif(n, 0, 10), runif(n, 10, 20)), ncol = 2,
42+
# dimnames = list(NULL, c("x1", "x2")))
43+
1844
cbs <- sim$cbs
45+
expect_true(all(c("intercept", "covariate_1", "covariate_2") %in% names(cbs)))
1946

2047
# test basic parameter estimation
48+
draws <- abe.mcmc.DrawParameters(as.data.table(cbs),
49+
mcmc = 10, burnin = 0, thin = 1, mc.cores = 1)
2150
draws <- abe.mcmc.DrawParameters(as.data.table(cbs), covariates = c("covariate_1"),
2251
mcmc = 10, burnin = 0, thin = 1, mc.cores = 1)
2352

@@ -57,4 +86,21 @@ test_that("Pareto/NBD (Abe) MCMC", {
5786
expect_true(all(cbs$x.star == round(cbs$x.star)))
5887
expect_true(all(cbs$palive >= 0 & cbs$palive <= 1))
5988

89+
# check tracking plots
90+
inc <- elog2inc(sim$elog)
91+
mat <- mcmc.PlotTrackingInc(draws,
92+
T.cal = cbs$T.cal,
93+
T.tot = max(cbs$T.cal + cbs$T.star),
94+
actual.inc.tracking.data = inc,
95+
covariates = cbs[, c("covariate_1", "covariate_2")])
96+
mat.sum <- apply(mat, 1, sum)
97+
expect_equal(mat[1], mat[2], tolerance = 0.1)
98+
cum <- elog2cum(sim$elog)
99+
mat <- mcmc.PlotTrackingCum(draws,
100+
T.cal = cbs$T.cal,
101+
T.tot = max(cbs$T.cal + cbs$T.star),
102+
actual.cu.tracking.data = cum,
103+
covariates = cbs[, c("covariate_1", "covariate_2")])
104+
expect_equal(mat[, ncol(mat) - 1], mat[, ncol(mat) - 1], tolerance = 0.1)
105+
60106
})

0 commit comments

Comments
 (0)