Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for discounted properties in MDPs and DTMCs #621

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions resources/examples/testfiles/mdp/small_discount.nm
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
mdp

module discounting

s : [0..3] init 0;
[a] s=0 -> (s'=1);
[b] s=0 -> (s'=2);
[] s=1 -> (s'=3);
[] s=2 -> 0.8 : (s'=2) + 0.2 : (s'=3);
[] s=3 -> true;

endmodule

rewards
[] s=1 : 4;
[] s=2 : 1;
endrewards
35 changes: 34 additions & 1 deletion src/storm-parsers/parser/FormulaParserGrammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ void FormulaParserGrammar::initialize() {
operatorSubFormula =
(((qi::eps(qi::_r1 == storm::logic::FormulaContext::Probability) > formula(FormulaKind::Path, qi::_r1)) |
(qi::eps(qi::_r1 == storm::logic::FormulaContext::Reward) >
(longRunAverageRewardFormula | eventuallyFormula(qi::_r1) | cumulativeRewardFormula | instantaneousRewardFormula | totalRewardFormula)) |
(longRunAverageRewardFormula | eventuallyFormula(qi::_r1) | discountedCumulativeRewardFormula | discountedTotalRewardFormula |
cumulativeRewardFormula | instantaneousRewardFormula | totalRewardFormula)) |
(qi::eps(qi::_r1 == storm::logic::FormulaContext::Time) > eventuallyFormula(qi::_r1)) |
(qi::eps(qi::_r1 == storm::logic::FormulaContext::LongRunAverage) > formula(FormulaKind::State, storm::logic::FormulaContext::LongRunAverage))) >>
-(qi::lit("||") >
Expand Down Expand Up @@ -174,6 +175,14 @@ void FormulaParserGrammar::initialize() {
pathFormula.name("path formula");

// Quantitative path formulae (reward)
discountedCumulativeRewardFormula =
(qi::lit("Cdiscount=") >> expressionParser >>
timeBounds)[qi::_val = phoenix::bind(&FormulaParserGrammar::createDiscountedCumulativeRewardFormula, phoenix::ref(*this), qi::_1, qi::_2)];
discountedCumulativeRewardFormula.name("discounted cumulative reward formula");
discountedTotalRewardFormula =
(qi::lit("Cdiscount=") >>
expressionParser)[qi::_val = phoenix::bind(&FormulaParserGrammar::createDiscountedTotalRewardFormula, phoenix::ref(*this), qi::_1)];
discountedTotalRewardFormula.name("discounted total reward formula");
longRunAverageRewardFormula = (qi::lit("LRA") | qi::lit("S") |
qi::lit("MP"))[qi::_val = phoenix::bind(&FormulaParserGrammar::createLongRunAverageRewardFormula, phoenix::ref(*this))];
longRunAverageRewardFormula.name("long run average reward formula");
Expand Down Expand Up @@ -278,6 +287,8 @@ void FormulaParserGrammar::initialize() {
// debug(filterProperty)
// debug(constantDefinition )
// debug(start)
// debug(discountedCumulativeRewardFormula)
// debug(discountedTotalRewardFormula)

// Enable error reporting.
qi::on_error<qi::fail>(rewardModelName, handler(qi::_1, qi::_2, qi::_3, qi::_4));
Expand Down Expand Up @@ -305,6 +316,8 @@ void FormulaParserGrammar::initialize() {
qi::on_error<qi::fail>(instantaneousRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(cumulativeRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(totalRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(discountedCumulativeRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(discountedTotalRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(playerCoalition, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(gameFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(multiOperatorFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
Expand Down Expand Up @@ -402,10 +415,30 @@ std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createCumulat
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::CumulativeRewardFormula(bounds, timeBoundReferences));
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createDiscountedCumulativeRewardFormula(
storm::expressions::Expression const& discountFactor,
std::vector<std::tuple<boost::optional<storm::logic::TimeBound>, boost::optional<storm::logic::TimeBound>,
std::shared_ptr<storm::logic::TimeBoundReference>>> const& timeBounds) const {
std::vector<storm::logic::TimeBound> bounds;
std::vector<storm::logic::TimeBoundReference> timeBoundReferences;
for (auto const& timeBound : timeBounds) {
STORM_LOG_THROW(!std::get<0>(timeBound), storm::exceptions::WrongFormatException, "Cumulative reward formulas with lower time bound are not allowed.");
STORM_LOG_THROW(std::get<1>(timeBound), storm::exceptions::WrongFormatException, "Cumulative reward formulas require an upper bound.");
bounds.push_back(std::get<1>(timeBound).get());
timeBoundReferences.emplace_back(*std::get<2>(timeBound));
}
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::DiscountedCumulativeRewardFormula(discountFactor, bounds, timeBoundReferences));
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createTotalRewardFormula() const {
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::TotalRewardFormula());
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createDiscountedTotalRewardFormula(
storm::expressions::Expression const& discountFactor) const {
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::DiscountedTotalRewardFormula(discountFactor));
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createLongRunAverageRewardFormula() const {
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::LongRunAverageRewardFormula());
}
Expand Down
7 changes: 7 additions & 0 deletions src/storm-parsers/parser/FormulaParserGrammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class FormulaParserGrammar : public qi::grammar<Iterator, std::vector<storm::jan
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> instantaneousRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> cumulativeRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> totalRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> discountedCumulativeRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> discountedTotalRewardFormula;

// Game Formulae
qi::rule<Iterator, storm::logic::PlayerCoalition(), qi::locals<std::vector<std::variant<std::string, storm::storage::PlayerIndex>>>, Skipper>
Expand Down Expand Up @@ -218,7 +220,12 @@ class FormulaParserGrammar : public qi::grammar<Iterator, std::vector<storm::jan
std::shared_ptr<storm::logic::Formula const> createCumulativeRewardFormula(
std::vector<std::tuple<boost::optional<storm::logic::TimeBound>, boost::optional<storm::logic::TimeBound>,
std::shared_ptr<storm::logic::TimeBoundReference>>> const& timeBounds) const;
std::shared_ptr<storm::logic::Formula const> createDiscountedCumulativeRewardFormula(
storm::expressions::Expression const& discountFactor,
std::vector<std::tuple<boost::optional<storm::logic::TimeBound>, boost::optional<storm::logic::TimeBound>,
std::shared_ptr<storm::logic::TimeBoundReference>>> const& timeBounds) const;
std::shared_ptr<storm::logic::Formula const> createTotalRewardFormula() const;
std::shared_ptr<storm::logic::Formula const> createDiscountedTotalRewardFormula(storm::expressions::Expression const& discountFactor) const;
std::shared_ptr<storm::logic::Formula const> createLongRunAverageRewardFormula() const;
std::shared_ptr<storm::logic::Formula const> createAtomicExpressionFormula(storm::expressions::Expression const& expression) const;
std::shared_ptr<storm::logic::Formula const> createBooleanLiteralFormula(bool literal) const;
Expand Down
12 changes: 9 additions & 3 deletions src/storm-pomdp/analysis/FormulaInformation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ FormulaInformation::FormulaInformation() : type(Type::Unsupported) {

FormulaInformation::FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, std::optional<std::string> const& rewardModelName)
: type(type), optimizationDirection(dir), rewardModelName(rewardModelName) {
STORM_LOG_ASSERT(!this->rewardModelName.has_value() || this->type == Type::NonNestedExpectedRewardFormula,
"Got a reward model name for a non-reward formula.");
STORM_LOG_ASSERT(
!this->rewardModelName.has_value() || this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::DiscountedTotalRewardFormula,
"Got a reward model name for a non-reward formula.");
}

FormulaInformation::Type const& FormulaInformation::getType() const {
Expand All @@ -41,6 +42,10 @@ bool FormulaInformation::isNonNestedExpectedRewardFormula() const {
return type == Type::NonNestedExpectedRewardFormula;
}

bool FormulaInformation::isDiscountedTotalRewardFormula() const {
return type == Type::DiscountedTotalRewardFormula;
}

bool FormulaInformation::isUnsupported() const {
return type == Type::Unsupported;
}
Expand All @@ -57,7 +62,8 @@ typename FormulaInformation::StateSet const& FormulaInformation::getSinkStates()
}

std::string const& FormulaInformation::getRewardModelName() const {
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula, "Reward model requested for unexpected formula type.");
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::DiscountedTotalRewardFormula,
"Reward model requested for unexpected formula type.");
return rewardModelName.value();
}

Expand Down
2 changes: 2 additions & 0 deletions src/storm-pomdp/analysis/FormulaInformation.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class FormulaInformation {
enum class Type {
NonNestedReachabilityProbability, // e.g. 'Pmax=? [F "target"]' or 'Pmin=? [!"sink" U "target"]'
NonNestedExpectedRewardFormula, // e.g. 'Rmin=? [F x>0 ]'
DiscountedTotalRewardFormula, // e.g. 'Rmax=? [Cdiscount=9/10}]'
Unsupported // The formula type is unsupported
};

Expand All @@ -37,6 +38,7 @@ class FormulaInformation {
Type const& getType() const;
bool isNonNestedReachabilityProbability() const;
bool isNonNestedExpectedRewardFormula() const;
bool isDiscountedTotalRewardFormula() const;
bool isUnsupported() const;
StateSet const& getTargetStates() const;
StateSet const& getSinkStates() const; // Shall not be called for reward formulas
Expand Down
8 changes: 8 additions & 0 deletions src/storm/logic/CloneVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,13 @@ boost::any CloneVisitor::visit(HOAPathFormula const& f, boost::any const& data)
}
return std::static_pointer_cast<Formula>(result);
}

boost::any CloneVisitor::visit(DiscountedCumulativeRewardFormula const& f, boost::any const&) const {
return std::static_pointer_cast<Formula>(std::make_shared<DiscountedCumulativeRewardFormula>(f));
}

boost::any CloneVisitor::visit(DiscountedTotalRewardFormula const& f, boost::any const&) const {
return std::static_pointer_cast<Formula>(std::make_shared<DiscountedTotalRewardFormula>(f));
}
} // namespace logic
} // namespace storm
2 changes: 2 additions & 0 deletions src/storm/logic/CloneVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class CloneVisitor : public FormulaVisitor {
virtual boost::any visit(UnaryBooleanPathFormula const& f, boost::any const& data) const override;
virtual boost::any visit(UntilFormula const& f, boost::any const& data) const override;
virtual boost::any visit(HOAPathFormula const& f, boost::any const& data) const override;
virtual boost::any visit(DiscountedCumulativeRewardFormula const& f, boost::any const& data) const override;
virtual boost::any visit(DiscountedTotalRewardFormula const& f, boost::any const& data) const override;
};

} // namespace logic
Expand Down
101 changes: 101 additions & 0 deletions src/storm/logic/DiscountedCumulativeRewardFormula.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
#include "storm/logic/DiscountedCumulativeRewardFormula.h"
#include <boost/any.hpp>
#include <ostream>
#include <utility>

#include "storm/adapters/RationalNumberAdapter.h"
#include "storm/logic/FormulaVisitor.h"

#include "storm/exceptions/InvalidOperationException.h"
#include "storm/exceptions/InvalidPropertyException.h"
#include "storm/utility/constants.h"
#include "storm/utility/macros.h"

namespace storm {
namespace logic {
DiscountedCumulativeRewardFormula::DiscountedCumulativeRewardFormula(storm::expressions::Expression const& discountFactor, TimeBound const& bound,
TimeBoundReference const& timeBoundReference,
boost::optional<RewardAccumulation> rewardAccumulation)
: CumulativeRewardFormula(bound, timeBoundReference, std::move(rewardAccumulation)), discountFactor(discountFactor) {
// Intentionally left empty.
}

DiscountedCumulativeRewardFormula::DiscountedCumulativeRewardFormula(storm::expressions::Expression const& discountFactor, std::vector<TimeBound> const& bounds,
std::vector<TimeBoundReference> const& timeBoundReferences,
boost::optional<RewardAccumulation> rewardAccumulation)
: CumulativeRewardFormula(bounds, timeBoundReferences, std::move(rewardAccumulation)), discountFactor(discountFactor) {
// Intentionally left empty.
}

bool DiscountedCumulativeRewardFormula::isDiscountedCumulativeRewardFormula() const {
return true;
}

void DiscountedCumulativeRewardFormula::checkNoVariablesInDiscountFactor(storm::expressions::Expression const& factor) {
STORM_LOG_THROW(!factor.containsVariables(), storm::exceptions::InvalidOperationException,
"Cannot evaluate discount factor '" << factor << "' as it contains undefined constants.");
}

storm::expressions::Expression const& DiscountedCumulativeRewardFormula::getDiscountFactor() const {
return discountFactor;
}

template<>
double DiscountedCumulativeRewardFormula::getDiscountFactor() const {
checkNoVariablesInDiscountFactor(discountFactor);
double value = discountFactor.evaluateAsDouble();
return value;
}

template<>
storm::RationalNumber DiscountedCumulativeRewardFormula::getDiscountFactor() const {
checkNoVariablesInDiscountFactor(discountFactor);
storm::RationalNumber value = discountFactor.evaluateAsRational();
return value;
}

std::ostream& DiscountedCumulativeRewardFormula::writeToStream(std::ostream& out, bool /*allowParentheses*/) const {
// No parentheses necessary
out << "Cdiscount=";
out << discountFactor;
if (hasRewardAccumulation()) {
out << "[" << getRewardAccumulation() << "]";
}
if (this->isMultiDimensional()) {
out << "^{";
}
for (unsigned i = 0; i < this->getDimension(); ++i) {
if (i > 0) {
out << ", ";
}
if (this->getTimeBoundReference(i).isRewardBound()) {
out << "rew";
if (this->getTimeBoundReference(i).hasRewardAccumulation()) {
out << "[" << this->getTimeBoundReference(i).getRewardAccumulation() << "]";
}
out << "{\"" << this->getTimeBoundReference(i).getRewardName() << "\"}";
} else if (this->getTimeBoundReference(i).isStepBound()) {
out << "steps";
//} else if (this->getTimeBoundReference(i).isStepBound())
// Note: the 'time' keyword is optional.
// out << "time";
}
if (this->isBoundStrict(i)) {
out << "<";
} else {
out << "<=";
}
out << this->getBound(i);
}
if (this->isMultiDimensional()) {
out << "}";
}
return out;
}

boost::any DiscountedCumulativeRewardFormula::accept(FormulaVisitor const& visitor, boost::any const& data) const {
return visitor.visit(*this, data);
}

} // namespace logic
} // namespace storm
38 changes: 38 additions & 0 deletions src/storm/logic/DiscountedCumulativeRewardFormula.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#pragma once

#include "storm/logic/CumulativeRewardFormula.h"

#include "storm/logic/TimeBound.h"
#include "storm/logic/TimeBoundType.h"

namespace storm {
namespace logic {
class DiscountedCumulativeRewardFormula : public CumulativeRewardFormula {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This means that .isCumulativeRewardFormula() is also true forDiscountedCumulativeRewardFormulas.
Does this have any unintended effects? (e.g. places like multi-objective model checking, where the discounted formula would be treaded as if it were undiscounted)? Maybe it's safer to also override .isCumulativeRewardFormula() to false here, so things will rather fail instead of silently dropping the discount factor.

Also, this probably needs to override gatherUsedVariables to make sure that variables in the discount factor are catched.

Same for TotalRewardFormulas

public:
DiscountedCumulativeRewardFormula(storm::expressions::Expression const& discountFactor, TimeBound const& bound,
TimeBoundReference const& timeBoundReference = TimeBoundReference(TimeBoundType::Time),
boost::optional<RewardAccumulation> rewardAccumulation = boost::none);
DiscountedCumulativeRewardFormula(storm::expressions::Expression const& discountFactor, std::vector<TimeBound> const& bounds,
std::vector<TimeBoundReference> const& timeBoundReferences,
boost::optional<RewardAccumulation> rewardAccumulation = boost::none);

virtual ~DiscountedCumulativeRewardFormula() = default;

bool isDiscountedCumulativeRewardFormula() const override;

std::ostream& writeToStream(std::ostream& out, bool allowParentheses = false) const override;

storm::expressions::Expression const& getDiscountFactor() const;

template<typename ValueType>
ValueType getDiscountFactor() const;

virtual boost::any accept(FormulaVisitor const& visitor, boost::any const& data) const override;

private:
static void checkNoVariablesInDiscountFactor(storm::expressions::Expression const& factor);

storm::expressions::Expression const discountFactor;
};
} // namespace logic
} // namespace storm
63 changes: 63 additions & 0 deletions src/storm/logic/DiscountedTotalRewardFormula.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
#include "storm/logic/DiscountedTotalRewardFormula.h"
#include <boost/any.hpp>

#include <ostream>

#include "storm/adapters/RationalNumberAdapter.h"

#include "storm/exceptions/InvalidOperationException.h"
#include "storm/logic/FormulaVisitor.h"

#include "storm/utility/macros.h"

namespace storm {
namespace logic {
DiscountedTotalRewardFormula::DiscountedTotalRewardFormula(storm::expressions::Expression const discountFactor,
boost::optional<RewardAccumulation> rewardAccumulation)
: TotalRewardFormula(rewardAccumulation), discountFactor(discountFactor) {
// Intentionally left empty.
}

bool DiscountedTotalRewardFormula::isDiscountedTotalRewardFormula() const {
return true;
}

std::ostream& DiscountedTotalRewardFormula::writeToStream(std::ostream& out, bool /* allowParentheses */) const {
// No parentheses necessary
out << "Cdiscount=";
out << discountFactor.toString();
if (hasRewardAccumulation()) {
out << "[" << getRewardAccumulation() << "]";
}
return out;
}

storm::expressions::Expression const& DiscountedTotalRewardFormula::getDiscountFactor() const {
return discountFactor;
}

template<>
double DiscountedTotalRewardFormula::getDiscountFactor() const {
checkNoVariablesInDiscountFactor(discountFactor);
double value = discountFactor.evaluateAsDouble();
return value;
}

template<>
storm::RationalNumber DiscountedTotalRewardFormula::getDiscountFactor() const {
checkNoVariablesInDiscountFactor(discountFactor);
storm::RationalNumber value = discountFactor.evaluateAsRational();
return value;
}

void DiscountedTotalRewardFormula::checkNoVariablesInDiscountFactor(storm::expressions::Expression const& factor) {
STORM_LOG_THROW(!factor.containsVariables(), storm::exceptions::InvalidOperationException,
"Cannot evaluate discount factor '" << factor << "' as it contains undefined constants.");
}

boost::any DiscountedTotalRewardFormula::accept(FormulaVisitor const& visitor, boost::any const& data) const {
return visitor.visit(*this, data);
}

} // namespace logic
} // namespace storm
Loading
Loading