Fix strategy synthesis with implicit sink state

Zinoex · Jan 21, 2025 · 5833bca · 5833bca · Zinoex · Jan 21, 2025
1 parent 5b44edb
commit 5833bca
Show file tree

Hide file tree

Showing 6 changed files with 48 additions and 2 deletions.
diff --git a/Project.toml b/Project.toml
@@ -1,7 +1,7 @@
 name = "IntervalMDP"
 uuid = "051c988a-e73c-45a4-90ec-875cac0402c7"
 authors = ["Frederik Baymler Mathiesen <[email protected]> and contributors"]
-version = "0.4.2"
+version = "0.4.3"
 
 [deps]
 JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6"

diff --git a/src/models/IntervalMarkovDecisionProcess.jl b/src/models/IntervalMarkovDecisionProcess.jl
@@ -182,3 +182,4 @@ stateptr(mdp::IntervalMarkovDecisionProcess) = mdp.stateptr
 max_actions(mdp::IntervalMarkovDecisionProcess) = maxdiff(stateptr(mdp))
 Base.ndims(::IntervalMarkovDecisionProcess) = one(Int32)
 product_num_states(mp::IntervalMarkovDecisionProcess) = (num_states(mp),)
+source_shape(mp::IntervalMarkovDecisionProcess) = (length(stateptr(mp)) - 1,)
diff --git a/src/models/MixtureIntervalMarkovDecisionProcess.jl b/src/models/MixtureIntervalMarkovDecisionProcess.jl
@@ -165,3 +165,4 @@ max_actions(mdp::MixtureIntervalMarkovDecisionProcess) = maxdiff(stateptr(mdp))
 Base.ndims(::MixtureIntervalMarkovDecisionProcess{N}) where {N} = Int32(N)
 product_num_states(mp::MixtureIntervalMarkovDecisionProcess) =
     num_target(transition_prob(mp))
+source_shape(mp::MixtureIntervalMarkovDecisionProcess) = source_shape(transition_prob(mp))
diff --git a/src/models/OrthogonalIntervalMarkovDecisionProcess.jl b/src/models/OrthogonalIntervalMarkovDecisionProcess.jl
@@ -142,3 +142,4 @@ max_actions(mdp::OrthogonalIntervalMarkovDecisionProcess) = maxdiff(stateptr(mdp
 Base.ndims(::OrthogonalIntervalMarkovDecisionProcess{N}) where {N} = Int32(N)
 product_num_states(mp::OrthogonalIntervalMarkovDecisionProcess) =
     num_target(transition_prob(mp))
+source_shape(mp::OrthogonalIntervalMarkovDecisionProcess) = source_shape(transition_prob(mp))
diff --git a/src/strategy.jl b/src/strategy.jl
@@ -123,7 +123,7 @@ depends on the configuration and the device to store the strategy depends on the
 function construct_strategy_cache end
 
 construct_strategy_cache(mp::IntervalMarkovProcess, config, strategy = NoStrategy()) =
-    construct_strategy_cache(mp, config, strategy, product_num_states(mp))
+    construct_strategy_cache(mp, config, strategy, source_shape(mp))
 
 # Strategy cache for applying given policies - useful for dispatching
 struct GivenStrategyCache{S <: AbstractStrategy} <: NonOptimizingStrategyCache

diff --git a/test/base/synthesis.jl b/test/base/synthesis.jl
@@ -107,3 +107,46 @@ end
 
 # The last time step (aka. the first value iteration step) has a different strategy.
 @test policy[time_length(policy)] == [2, 1, 1]
+
+@testset "implicit sink state" begin
+    prob1 = IntervalProbabilities(;
+        lower = [
+            0.0 0.5
+            0.1 0.3
+            0.2 0.1
+        ],
+        upper = [
+            0.5 0.7
+            0.6 0.5
+            0.7 0.3
+        ],
+    )
+
+    prob2 = IntervalProbabilities(;
+        lower = [
+            0.1 0.2
+            0.2 0.3
+            0.3 0.4
+        ],
+        upper = [
+            0.6 0.6
+            0.5 0.5
+            0.4 0.4
+        ],
+    )
+
+    transition_probs = [prob1, prob2]
+    mdp = IntervalMarkovDecisionProcess(transition_probs)
+
+    # Finite time reachability
+    prop = FiniteTimeReachability([3], 10)
+    spec = Specification(prop, Pessimistic, Maximize)
+    problem = Problem(mdp, spec)
+    policy, V, k, res = control_synthesis(problem)
+
+    @test policy isa TimeVaryingStrategy
+    @test time_length(policy) == 10
+    for k in 1:time_length(policy)
+        @test policy[k] == [1, 2]
+    end
+end