Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for discounted properties in MDPs and DTMCs #621

Open
wants to merge 9 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions resources/examples/testfiles/mdp/small_discount.nm
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
mdp

module discounting

s : [0..3] init 0;
[a] s=0 -> (s'=1);
[b] s=0 -> (s'=2);
[] s=1 -> (s'=3);
[] s=2 -> 0.8 : (s'=2) + 0.2 : (s'=3);
[] s=3 -> true;

endmodule

rewards
[] s=1 : 4;
[] s=2 : 1;
endrewards
35 changes: 34 additions & 1 deletion src/storm-parsers/parser/FormulaParserGrammar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,8 @@ void FormulaParserGrammar::initialize() {
operatorSubFormula =
(((qi::eps(qi::_r1 == storm::logic::FormulaContext::Probability) > formula(FormulaKind::Path, qi::_r1)) |
(qi::eps(qi::_r1 == storm::logic::FormulaContext::Reward) >
(longRunAverageRewardFormula | eventuallyFormula(qi::_r1) | cumulativeRewardFormula | instantaneousRewardFormula | totalRewardFormula)) |
(longRunAverageRewardFormula | eventuallyFormula(qi::_r1) | discountedCumulativeRewardFormula | discountedTotalRewardFormula |
cumulativeRewardFormula | instantaneousRewardFormula | totalRewardFormula)) |
(qi::eps(qi::_r1 == storm::logic::FormulaContext::Time) > eventuallyFormula(qi::_r1)) |
(qi::eps(qi::_r1 == storm::logic::FormulaContext::LongRunAverage) > formula(FormulaKind::State, storm::logic::FormulaContext::LongRunAverage))) >>
-(qi::lit("||") >
Expand Down Expand Up @@ -174,6 +175,14 @@ void FormulaParserGrammar::initialize() {
pathFormula.name("path formula");

// Quantitative path formulae (reward)
discountedCumulativeRewardFormula =
(qi::lit("Cdiscount=") >> expressionParser >>
timeBounds)[qi::_val = phoenix::bind(&FormulaParserGrammar::createDiscountedCumulativeRewardFormula, phoenix::ref(*this), qi::_1, qi::_2)];
discountedCumulativeRewardFormula.name("discounted cumulative reward formula");
discountedTotalRewardFormula =
(qi::lit("Cdiscount=") >>
expressionParser)[qi::_val = phoenix::bind(&FormulaParserGrammar::createDiscountedTotalRewardFormula, phoenix::ref(*this), qi::_1)];
discountedTotalRewardFormula.name("discounted total reward formula");
longRunAverageRewardFormula = (qi::lit("LRA") | qi::lit("S") |
qi::lit("MP"))[qi::_val = phoenix::bind(&FormulaParserGrammar::createLongRunAverageRewardFormula, phoenix::ref(*this))];
longRunAverageRewardFormula.name("long run average reward formula");
Expand Down Expand Up @@ -278,6 +287,8 @@ void FormulaParserGrammar::initialize() {
// debug(filterProperty)
// debug(constantDefinition )
// debug(start)
// debug(discountedCumulativeRewardFormula)
// debug(discountedTotalRewardFormula)

// Enable error reporting.
qi::on_error<qi::fail>(rewardModelName, handler(qi::_1, qi::_2, qi::_3, qi::_4));
Expand Down Expand Up @@ -305,6 +316,8 @@ void FormulaParserGrammar::initialize() {
qi::on_error<qi::fail>(instantaneousRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(cumulativeRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(totalRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(discountedCumulativeRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(discountedTotalRewardFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(playerCoalition, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(gameFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
qi::on_error<qi::fail>(multiOperatorFormula, handler(qi::_1, qi::_2, qi::_3, qi::_4));
Expand Down Expand Up @@ -402,10 +415,30 @@ std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createCumulat
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::CumulativeRewardFormula(bounds, timeBoundReferences));
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createDiscountedCumulativeRewardFormula(
storm::expressions::Expression const& discountFactor,
std::vector<std::tuple<boost::optional<storm::logic::TimeBound>, boost::optional<storm::logic::TimeBound>,
std::shared_ptr<storm::logic::TimeBoundReference>>> const& timeBounds) const {
std::vector<storm::logic::TimeBound> bounds;
std::vector<storm::logic::TimeBoundReference> timeBoundReferences;
for (auto const& timeBound : timeBounds) {
STORM_LOG_THROW(!std::get<0>(timeBound), storm::exceptions::WrongFormatException, "Cumulative reward formulas with lower time bound are not allowed.");
STORM_LOG_THROW(std::get<1>(timeBound), storm::exceptions::WrongFormatException, "Cumulative reward formulas require an upper bound.");
bounds.push_back(std::get<1>(timeBound).get());
timeBoundReferences.emplace_back(*std::get<2>(timeBound));
}
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::DiscountedCumulativeRewardFormula(discountFactor, bounds, timeBoundReferences));
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createTotalRewardFormula() const {
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::TotalRewardFormula());
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createDiscountedTotalRewardFormula(
storm::expressions::Expression const& discountFactor) const {
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::DiscountedTotalRewardFormula(discountFactor));
}

std::shared_ptr<storm::logic::Formula const> FormulaParserGrammar::createLongRunAverageRewardFormula() const {
return std::shared_ptr<storm::logic::Formula const>(new storm::logic::LongRunAverageRewardFormula());
}
Expand Down
7 changes: 7 additions & 0 deletions src/storm-parsers/parser/FormulaParserGrammar.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class FormulaParserGrammar : public qi::grammar<Iterator, std::vector<storm::jan
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> instantaneousRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> cumulativeRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> totalRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> discountedCumulativeRewardFormula;
qi::rule<Iterator, std::shared_ptr<storm::logic::Formula const>(), Skipper> discountedTotalRewardFormula;

// Game Formulae
qi::rule<Iterator, storm::logic::PlayerCoalition(), qi::locals<std::vector<std::variant<std::string, storm::storage::PlayerIndex>>>, Skipper>
Expand Down Expand Up @@ -218,7 +220,12 @@ class FormulaParserGrammar : public qi::grammar<Iterator, std::vector<storm::jan
std::shared_ptr<storm::logic::Formula const> createCumulativeRewardFormula(
std::vector<std::tuple<boost::optional<storm::logic::TimeBound>, boost::optional<storm::logic::TimeBound>,
std::shared_ptr<storm::logic::TimeBoundReference>>> const& timeBounds) const;
std::shared_ptr<storm::logic::Formula const> createDiscountedCumulativeRewardFormula(
storm::expressions::Expression const& discountFactor,
std::vector<std::tuple<boost::optional<storm::logic::TimeBound>, boost::optional<storm::logic::TimeBound>,
std::shared_ptr<storm::logic::TimeBoundReference>>> const& timeBounds) const;
std::shared_ptr<storm::logic::Formula const> createTotalRewardFormula() const;
std::shared_ptr<storm::logic::Formula const> createDiscountedTotalRewardFormula(storm::expressions::Expression const& discountFactor) const;
std::shared_ptr<storm::logic::Formula const> createLongRunAverageRewardFormula() const;
std::shared_ptr<storm::logic::Formula const> createAtomicExpressionFormula(storm::expressions::Expression const& expression) const;
std::shared_ptr<storm::logic::Formula const> createBooleanLiteralFormula(bool literal) const;
Expand Down
12 changes: 9 additions & 3 deletions src/storm-pomdp/analysis/FormulaInformation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@ FormulaInformation::FormulaInformation() : type(Type::Unsupported) {

FormulaInformation::FormulaInformation(Type const& type, storm::solver::OptimizationDirection const& dir, std::optional<std::string> const& rewardModelName)
: type(type), optimizationDirection(dir), rewardModelName(rewardModelName) {
STORM_LOG_ASSERT(!this->rewardModelName.has_value() || this->type == Type::NonNestedExpectedRewardFormula,
"Got a reward model name for a non-reward formula.");
STORM_LOG_ASSERT(
!this->rewardModelName.has_value() || this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::DiscountedTotalRewardFormula,
"Got a reward model name for a non-reward formula.");
}

FormulaInformation::Type const& FormulaInformation::getType() const {
Expand All @@ -41,6 +42,10 @@ bool FormulaInformation::isNonNestedExpectedRewardFormula() const {
return type == Type::NonNestedExpectedRewardFormula;
}

bool FormulaInformation::isDiscountedTotalRewardFormula() const {
return type == Type::DiscountedTotalRewardFormula;
}

bool FormulaInformation::isUnsupported() const {
return type == Type::Unsupported;
}
Expand All @@ -57,7 +62,8 @@ typename FormulaInformation::StateSet const& FormulaInformation::getSinkStates()
}

std::string const& FormulaInformation::getRewardModelName() const {
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula, "Reward model requested for unexpected formula type.");
STORM_LOG_ASSERT(this->type == Type::NonNestedExpectedRewardFormula || this->type == Type::DiscountedTotalRewardFormula,
"Reward model requested for unexpected formula type.");
return rewardModelName.value();
}

Expand Down
2 changes: 2 additions & 0 deletions src/storm-pomdp/analysis/FormulaInformation.h
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ class FormulaInformation {
enum class Type {
NonNestedReachabilityProbability, // e.g. 'Pmax=? [F "target"]' or 'Pmin=? [!"sink" U "target"]'
NonNestedExpectedRewardFormula, // e.g. 'Rmin=? [F x>0 ]'
DiscountedTotalRewardFormula, // e.g. 'Rmax=? [Cdiscount=9/10}]'
Unsupported // The formula type is unsupported
};

Expand All @@ -37,6 +38,7 @@ class FormulaInformation {
Type const& getType() const;
bool isNonNestedReachabilityProbability() const;
bool isNonNestedExpectedRewardFormula() const;
bool isDiscountedTotalRewardFormula() const;
bool isUnsupported() const;
StateSet const& getTargetStates() const;
StateSet const& getSinkStates() const; // Shall not be called for reward formulas
Expand Down
8 changes: 8 additions & 0 deletions src/storm/logic/CloneVisitor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -169,5 +169,13 @@ boost::any CloneVisitor::visit(HOAPathFormula const& f, boost::any const& data)
}
return std::static_pointer_cast<Formula>(result);
}

boost::any CloneVisitor::visit(DiscountedCumulativeRewardFormula const& f, boost::any const&) const {
return std::static_pointer_cast<Formula>(std::make_shared<DiscountedCumulativeRewardFormula>(f));
}

boost::any CloneVisitor::visit(DiscountedTotalRewardFormula const& f, boost::any const&) const {
return std::static_pointer_cast<Formula>(std::make_shared<DiscountedTotalRewardFormula>(f));
}
} // namespace logic
} // namespace storm
2 changes: 2 additions & 0 deletions src/storm/logic/CloneVisitor.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ class CloneVisitor : public FormulaVisitor {
virtual boost::any visit(UnaryBooleanPathFormula const& f, boost::any const& data) const override;
virtual boost::any visit(UntilFormula const& f, boost::any const& data) const override;
virtual boost::any visit(HOAPathFormula const& f, boost::any const& data) const override;
virtual boost::any visit(DiscountedCumulativeRewardFormula const& f, boost::any const& data) const override;
virtual boost::any visit(DiscountedTotalRewardFormula const& f, boost::any const& data) const override;
};

} // namespace logic
Expand Down
Loading
Loading