diff --git a/stan/math/prim/prob/yule_simon_lccdf.hpp b/stan/math/prim/prob/yule_simon_lccdf.hpp new file mode 100644 index 00000000000..bee197415b9 --- /dev/null +++ b/stan/math/prim/prob/yule_simon_lccdf.hpp @@ -0,0 +1,84 @@ +#ifndef STAN_MATH_PRIM_PROB_YULE_SIMON_LCCDF_HPP +#define STAN_MATH_PRIM_PROB_YULE_SIMON_LCCDF_HPP + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace stan { +namespace math { + +/** \ingroup prob_dists + * Returns the log CCDF of the Yule-Simon distribution with shape parameter. + * Given containers of matching sizes, returns the log sum of probabilities. + * + * @tparam T_n type of outcome parameter + * @tparam T_alpha type of shape parameter + * + * @param n outcome variable + * @param alpha shape parameter + * @return log probability or log sum of probabilities + * @throw std::domain_error if alpha fails to be positive + * @throw std::invalid_argument if container sizes mismatch + */ +template +inline return_type_t yule_simon_lccdf(const T_n& n, + const T_alpha& alpha) { + using T_partials_return = partials_return_t; + using T_n_ref = ref_type_t; + using T_alpha_ref = ref_type_t; + static constexpr const char* function = "yule_simon_lccdf"; + + check_consistent_sizes(function, "Outcome variable", n, "Shape parameter", + alpha); + if (size_zero(n, alpha)) { + return 0.0; + } + + T_n_ref n_ref = n; + T_alpha_ref alpha_ref = alpha; + check_positive_finite(function, "Shape parameter", alpha_ref); + + scalar_seq_view n_vec(n); + scalar_seq_view alpha_vec(alpha_ref); + const size_t max_size_seq_view = max_size(n_ref, alpha_ref); + + // Explicit return for invalid or extreme values + // The gradients are technically ill-defined, but treated as zero + for (int i = 0; i < stan::math::size(n); i++) { + if (n_vec.val(i) < 1.0) { + return 0.0; + } + if (n_vec.val(i) == std::numeric_limits::max()) { + return negative_infinity(); + } + } + + T_partials_return log_ccdf(0.0); + auto ops_partials = make_partials_propagator(alpha_ref); + for (size_t i = 0; i < max_size_seq_view; i++) { + auto np1 = n_vec.val(i) + 1.0; + auto ap1 = alpha_vec.val(i) + 1.0; + auto nap1 = n_vec.val(i) + ap1; + log_ccdf += lgamma(ap1) + lgamma(np1) - lgamma(nap1); + + if constexpr (!is_constant::value) { + partials<0>(ops_partials)[i] += digamma(ap1) - digamma(nap1); + } + } + + return ops_partials.build(log_ccdf); +} + +} // namespace math +} // namespace stan +#endif diff --git a/test/prob/yule_simon/yule_simon_ccdf_log_test.hpp b/test/prob/yule_simon/yule_simon_ccdf_log_test.hpp new file mode 100644 index 00000000000..d5dbe5521b3 --- /dev/null +++ b/test/prob/yule_simon/yule_simon_ccdf_log_test.hpp @@ -0,0 +1,65 @@ +// Arguments: Ints, Doubles +#include +#include + +using std::numeric_limits; +using std::vector; + +class AgradCcdfLogYuleSimon : public AgradCcdfLogTest { + public: + void valid_values(vector>& parameters, + vector& cdf_log) { + vector param(2); + + param[0] = 5; // n + param[1] = 20.0; // alpha + parameters.push_back(param); + cdf_log.push_back(std::log(1.0 - 0.9999811782420478)); // expected ccdf_log + + param[0] = 10; // n + param[1] = 5.5; // alpha + parameters.push_back(param); + cdf_log.push_back(std::log(1.0 - 0.9997987132162779)); // expected ccdf_log + } + + void invalid_values(vector& index, vector& value) { + // n + + // alpha + index.push_back(1U); + value.push_back(0.0); + + index.push_back(1U); + value.push_back(-1.0); + + index.push_back(1U); + value.push_back(std::numeric_limits::infinity()); + } + + // BOUND INCLUDED IN ORDER FOR TEST TO PASS WITH CURRENT FRAMEWORK + bool has_lower_bound() { return false; } + + bool has_upper_bound() { return false; } + + template + stan::return_type_t ccdf_log(const T_n& n, const T_alpha& alpha, + const T2&, const T3&, const T4&, + const T5&) { + return stan::math::yule_simon_lccdf(n, alpha); + } + + template + stan::return_type_t ccdf_log_function(const T_n& n, + const T_alpha& alpha, + const T2&, const T3&, + const T4&, const T5&) { + using stan::math::lgamma; + + auto log_ccdf + = lgamma(alpha + 1.0) + lgamma(n + 1.0) - lgamma(n + alpha + 1.0); + + return log_ccdf; + } +};