fixes #1 by not modifying param groups of optimizer

CognitiveModeling · Jan 18, 2021 · 4d60ce6 · 4d60ce6
1 parent 8080ddd
commit 4d60ce6
Show file tree

Hide file tree

Showing 4 changed files with 13 additions and 11 deletions.
diff --git a/doc/source/examples.rst b/doc/source/examples.rst
@@ -189,9 +189,9 @@ To look into the context and policy of the last time step you can do:
 .. code-block:: python
 
     >>> print(context)  # doctest: +ELLIPSIS
-    tensor([[[8.3..., 8.6...]]], requires_grad=True)
+    tensor([[[7.8..., 9.1...]]], requires_grad=True)
 
     >>> print(policy)  # doctest: +ELLIPSIS
-    tensor([[[-2..., -2...,  5...,  5...]],
+    tensor([[[ 6..., -7..., -6...,  7...]],
     ...
-            [[-2..., -1...,  4...,  5...]]], grad_fn=<CloneBackward>)
+    ...     [[ 4..., -7..., -6...,  7...]]], grad_fn=<CloneBackward>)
diff --git a/reprise/context_inference.py b/reprise/context_inference.py
@@ -73,6 +73,13 @@ def __init__(
         self._model_inputs = []
         self._observations = []
 
+        self._model_state = initial_model_state
+        for s in self._opt_accessor(self._model_state):
+            s.requires_grad_()
+
+        assert (len(self._opt_accessor(self._model_state)) ==
+                len(self._optimizer.param_groups[1]['params']))
+
     def predict(self, state):
         """
         Predict from the past.
@@ -149,13 +156,6 @@ def infer_contexts(self, model_input, observation):
         self._observations.append(observation)
         self._observations = self._observations[-self._inference_length:]
 
-        for _ in self._opt_accessor(self._model_state):
-            self._optimizer.param_groups[1]['params'].pop(-1)
-
-        for o in self._opt_accessor(self._model_state):
-            o.requires_grad_()
-            self._optimizer.param_groups[1]['params'].append(o)
-
         # Perform context inference cycles
         for _ in range(self._inference_cycles):
             self._optimizer.zero_grad()
@@ -177,6 +177,8 @@ def infer_contexts(self, model_input, observation):
         # the final output and state to be returned
         with torch.no_grad():
             outputs, states = self.predict(self._model_state)
-            self._model_state = states[0]
+            for i in range(len(self._model_state)):
+                for j in range(len(self._model_state[i])):
+                    self._model_state[i][j].data = states[0][i][j].data
 
         return self._context, outputs, states
diff --git a/tests/references/test_reprise_actions.npy b/tests/references/test_reprise_actions.npy
diff --git a/tests/references/test_reprise_contexts.npy b/tests/references/test_reprise_contexts.npy