diff --git a/axelrod/data/all_classifiers.yml b/axelrod/data/all_classifiers.yml
index ea52f2351..fb0b6a51c 100644
--- a/axelrod/data/all_classifiers.yml
+++ b/axelrod/data/all_classifiers.yml
@@ -1,4 +1,4 @@
-$\phi$:
+$\\phi$:
   inspects_source: false
   long_run_time: false
   makes_use_of: !!set {}
@@ -6,7 +6,7 @@ $\phi$:
   manipulates_state: false
   memory_depth: .inf
   stochastic: false
-$\pi$:
+$\\pi$:
   inspects_source: false
   long_run_time: false
   makes_use_of: !!set {}
@@ -439,6 +439,14 @@ Evolved ANN 5 Noise 05:
   manipulates_state: false
   memory_depth: .inf
   stochastic: false
+EvolvedAttention:
+  inspects_source: false
+  long_run_time: True
+  makes_use_of: !!set {}
+  manipulates_source: false
+  manipulates_state: false
+  memory_depth: 200
+  stochastic: false
 Evolved FSM 16:
   inspects_source: false
   long_run_time: false
diff --git a/axelrod/data/model_attention.pth b/axelrod/data/model_attention.pth
new file mode 100644
index 000000000..72ed07093
Binary files /dev/null and b/axelrod/data/model_attention.pth differ
diff --git a/axelrod/load_data_.py b/axelrod/load_data_.py
index 58ab40c46..10f03755d 100644
--- a/axelrod/load_data_.py
+++ b/axelrod/load_data_.py
@@ -2,6 +2,8 @@
 import pkgutil
 from typing import Callable, Dict, List, Optional, Tuple
 
+import torch
+
 
 def axl_filename(path: pathlib.Path) -> pathlib.Path:
     """Given a path under Axelrod/, return absolute filepath.
@@ -77,3 +79,14 @@ def load_pso_tables(filename="pso_gambler.csv", directory="data"):
         values = list(map(float, row[4:]))
         d[(name, int(a), int(b), int(c))] = values
     return d
+
+
+def load_attention_model_weights(
+    filename="model_attention.pth", directory="axelrod/data"
+):
+    """Load attention model weights."""
+    path = str(axl_filename(pathlib.Path(directory) / filename))
+    weights = torch.load(
+        path, map_location=torch.device("cpu"), weights_only=True
+    )
+    return weights
diff --git a/axelrod/strategies/_strategies.py b/axelrod/strategies/_strategies.py
index a209664c2..bc80eeccc 100644
--- a/axelrod/strategies/_strategies.py
+++ b/axelrod/strategies/_strategies.py
@@ -30,6 +30,7 @@
 from .ann import ANN, EvolvableANN  # pylint: disable=unused-import
 from .apavlov import APavlov2006, APavlov2011
 from .appeaser import Appeaser
+from .attention import EvolvedAttention
 from .averagecopier import AverageCopier, NiceAverageCopier
 from .axelrod_first import (
     FirstByDavis,
@@ -348,6 +349,7 @@
     EvolvedHMM5,
     EvolvedLookerUp1_1_1,
     EvolvedLookerUp2_2_2,
+    EvolvedAttention,
     FirmButFair,
     FirstByAnonymous,
     FirstByDavis,
diff --git a/axelrod/strategies/attention.py b/axelrod/strategies/attention.py
new file mode 100644
index 000000000..83bdf0e0b
--- /dev/null
+++ b/axelrod/strategies/attention.py
@@ -0,0 +1,380 @@
+import copy
+from enum import IntEnum
+from typing import Optional, Tuple
+
+import torch
+from torch import nn
+
+from axelrod.action import Action
+from axelrod.load_data_ import load_attention_model_weights
+from axelrod.player import Player
+
+C, D = Action.C, Action.D
+
+MEMORY_LENGTH = 200
+
+CLS_TOKEN = 0
+PAD_TOKEN = 1
+
+DEVICES = torch.device("cpu")
+
+
+class GameState(IntEnum):
+    CooperateDefect = 2
+    DefectCooperate = 3
+    CooperateCooperate = 4
+    DefectDefect = 5
+
+
+def actions_to_game_state(
+    player_action: Action, opponent_action: Action
+) -> GameState:
+    action_mapping = {
+        (C, D): GameState.CooperateDefect,
+        (D, C): GameState.DefectCooperate,
+        (C, C): GameState.CooperateCooperate,
+        (D, D): GameState.DefectDefect,
+    }
+    return action_mapping[(player_action, opponent_action)]
+
+
+def compute_features(
+    player: Player, opponent: Player, right_pad: bool = False
+) -> torch.IntTensor:
+    # The first token is the CLS token
+    player_history = player.history[-MEMORY_LENGTH:]
+    player_history = player_history[::-1]
+    opponent_history = opponent.history[-MEMORY_LENGTH:]
+    opponent_history = opponent_history[::-1]
+
+    feature_size = MEMORY_LENGTH + 1 if right_pad else len(player_history) + 1
+
+    game_history = torch.full((feature_size,), PAD_TOKEN, dtype=torch.int)
+    game_history[0] = CLS_TOKEN
+    for index, (action_player, action_opponent) in enumerate(
+        zip(player_history, opponent_history)
+    ):
+        game_state = actions_to_game_state(action_player, action_opponent)
+        game_history[index + 1] = game_state
+    return game_history
+
+
+class GELUActivation(nn.Module):
+    def __init__(self):
+        super().__init__()
+
+    def forward(self, input):
+        return nn.functional.gelu(input)
+
+
+class PlayerConfig:
+    def __init__(
+        self,
+        state_size=6,  # Number of possible game states, 4 possible game states and 2 specials token
+        hidden_size=256,
+        num_hidden_layers=24,
+        num_attention_heads=8,
+        intermediate_size=512,
+        hidden_dropout_prob=0.3,
+        attention_probs_dropout_prob=0.3,
+        max_game_size=MEMORY_LENGTH + 1,  # Add 1 for the CLS token
+        initializer_range=0.02,
+        layer_norm_eps=1e-12,
+    ):
+        self.state_size = state_size
+        self.hidden_size = hidden_size
+        self.num_hidden_layers = num_hidden_layers
+        self.num_attention_heads = num_attention_heads
+        self.intermediate_size = intermediate_size
+        self.hidden_dropout_prob = hidden_dropout_prob
+        self.attention_probs_dropout_prob = attention_probs_dropout_prob
+        self.max_game_size = max_game_size
+        self.initializer_range = initializer_range
+        self.layer_norm_eps = layer_norm_eps
+
+
+class PlayerEmbeddings(nn.Module):
+    """Construct the embeddings from game state and position embeddings."""
+
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.game_state_embeddings = nn.Embedding(
+            config.state_size, config.hidden_size
+        )
+        self.position_embeddings = nn.Embedding(
+            config.max_game_size, config.hidden_size
+        )
+        self.LayerNorm = nn.LayerNorm(
+            config.hidden_size, eps=config.layer_norm_eps
+        )
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+        self.register_buffer(
+            "position_ids",
+            torch.arange(config.max_game_size).expand((1, -1)),
+            persistent=False,
+        )
+
+    def forward(
+        self,
+        input_ids: torch.LongTensor,
+    ) -> Tuple[torch.Tensor, torch.Tensor]:
+        input_shape = input_ids.size()
+        seq_length = input_shape[1]
+        position_ids = self.position_ids[:, 0:seq_length]
+        embeddings = self.game_state_embeddings(input_ids)
+        position_embeddings = self.position_embeddings(position_ids)
+        embeddings += position_embeddings
+        embeddings = self.LayerNorm(embeddings)
+        embeddings = self.dropout(embeddings)
+
+        attention_mask = (input_ids != PAD_TOKEN).long()
+
+        return embeddings, attention_mask
+
+
+class PlayerSelfAttention(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.num_attention_heads = config.num_attention_heads
+        self.attention_head_size = int(
+            config.hidden_size / config.num_attention_heads
+        )
+        self.all_head_size = self.num_attention_heads * self.attention_head_size
+
+        self.query = nn.Linear(config.hidden_size, self.all_head_size)
+        self.key = nn.Linear(config.hidden_size, self.all_head_size)
+        self.value = nn.Linear(config.hidden_size, self.all_head_size)
+
+        self.dropout_prob = config.attention_probs_dropout_prob
+
+    def _transpose_for_scores(self, x: torch.Tensor) -> torch.Tensor:
+        new_x_shape = x.size()[:-1] + (
+            self.num_attention_heads,
+            self.attention_head_size,
+        )
+        x = x.view(new_x_shape)
+        return x.permute(0, 2, 1, 3)
+
+    @staticmethod
+    def _expand_mask(mask: torch.Tensor, dtype: torch.dtype) -> torch.Tensor:
+        """
+        Expands attention_mask from `[bsz, seq_len]` to `[bsz, 1, tgt_seq_len, src_seq_len]`.
+        """
+        bsz, src_len = mask.size()
+        tgt_len = src_len
+
+        expanded_mask = (
+            mask[:, None, None, :].expand(bsz, 1, tgt_len, src_len).to(dtype)
+        )
+
+        inverted_mask = 1.0 - expanded_mask
+
+        return inverted_mask.masked_fill(
+            inverted_mask.to(torch.bool), torch.finfo(dtype).min
+        )
+
+    def forward(
+        self, hidden_states: torch.Tensor, attention_mask: torch.Tensor
+    ) -> torch.Tensor:
+        bsz, tgt_len, _ = hidden_states.size()
+        query_layer = self._transpose_for_scores(self.query(hidden_states))
+        key_layer = self._transpose_for_scores(self.key(hidden_states))
+        value_layer = self._transpose_for_scores(self.value(hidden_states))
+
+        attn_mask = self._expand_mask(attention_mask, query_layer.dtype)
+
+        attn_output = torch.nn.functional.scaled_dot_product_attention(
+            query_layer,
+            key_layer,
+            value_layer,
+            dropout_p=self.dropout_prob if self.training else 0.0,
+            attn_mask=attn_mask,
+        )
+
+        attn_output = attn_output.transpose(1, 2)
+        attn_output = attn_output.reshape(bsz, tgt_len, self.all_head_size)
+        return attn_output
+
+
+class PlayerSelfOutput(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(
+            config.hidden_size, eps=config.layer_norm_eps
+        )
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(
+        self, hidden_states: torch.Tensor, input_tensor: torch.Tensor
+    ) -> torch.Tensor:
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class PlayerAttention(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.self = PlayerSelfAttention(config)
+        self.output = PlayerSelfOutput(config)
+
+    def forward(
+        self, hidden_states: torch.Tensor, attention_mask: torch.Tensor
+    ) -> torch.Tensor:
+        self_outputs = self.self(hidden_states, attention_mask)
+        attention_output = self.output(self_outputs, hidden_states)
+        return attention_output
+
+
+class PlayerIntermediate(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.intermediate_size)
+        self.intermediate_act_fn = GELUActivation()
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.intermediate_act_fn(hidden_states)
+        return hidden_states
+
+
+class PlayerOutput(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.dense = nn.Linear(config.intermediate_size, config.hidden_size)
+        self.LayerNorm = nn.LayerNorm(
+            config.hidden_size, eps=config.layer_norm_eps
+        )
+        self.dropout = nn.Dropout(config.hidden_dropout_prob)
+
+    def forward(
+        self, hidden_states: torch.Tensor, input_tensor: torch.Tensor
+    ) -> torch.Tensor:
+        hidden_states = self.dense(hidden_states)
+        hidden_states = self.dropout(hidden_states)
+        hidden_states = self.LayerNorm(hidden_states + input_tensor)
+        return hidden_states
+
+
+class PlayerLayer(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.seq_len_dim = 1
+        self.attention = PlayerAttention(config)
+        self.intermediate = PlayerIntermediate(config)
+        self.output = PlayerOutput(config)
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ) -> torch.Tensor:
+
+        attention_output = self.attention(hidden_states, attention_mask)
+        intermediate_output = self.intermediate(attention_output)
+        layer_output = self.output(intermediate_output, attention_output)
+        return layer_output
+
+
+class PlayerEncoder(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.layer = nn.ModuleList(
+            [PlayerLayer(config) for _ in range(config.num_hidden_layers)]
+        )
+
+    def forward(
+        self,
+        hidden_states: torch.Tensor,
+        attention_mask: torch.Tensor,
+    ) -> torch.Tensor:
+
+        for layer_module in self.layer:
+            hidden_states = layer_module(hidden_states, attention_mask)
+        return hidden_states
+
+
+class PlayerPooler(nn.Module):
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.dense = nn.Linear(config.hidden_size, config.hidden_size)
+        self.activation = nn.Tanh()
+
+    def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
+        first_token_tensor = hidden_states[:, 0]
+        pooled_output = self.dense(first_token_tensor)
+        pooled_output = self.activation(pooled_output)
+        return pooled_output
+
+
+class PlayerModel(nn.Module):
+    _no_split_modules = ["PlayerEmbeddings"]
+
+    def __init__(self, config: PlayerConfig):
+        super().__init__()
+        self.config = config
+        self.embeddings = PlayerEmbeddings(config)
+        self.encoder = PlayerEncoder(config)
+        self.pooler = PlayerPooler(config)
+
+        self.action = nn.Linear(config.hidden_size, 1)
+
+    def forward(self, input_ids: torch.Tensor) -> torch.Tensor:
+        embedding_output, attention_mask = self.embeddings(input_ids=input_ids)
+        sequence_output = self.encoder(embedding_output, attention_mask)
+        pooled_output = self.pooler(sequence_output)
+        return self.action(pooled_output)
+
+    def __eq__(self, other: object) -> bool:
+        return isinstance(other, PlayerModel)
+
+
+class EvolvedAttention(Player):
+    """A player who uses an attention mechanism to analyse the game. Trained with self-play.
+
+    Names:
+    - EvolvedAttention: EvolvedAttention by Marc-Olivier Derouin
+    """
+
+    name = "EvolvedAttention"
+    classifier = {
+        "memory_depth": MEMORY_LENGTH,
+        "stochastic": False,
+        "long_run_time": True,
+        "inspects_source": False,
+        "manipulates_source": False,
+        "manipulates_state": False,
+    }
+
+    def __init__(
+        self,
+    ) -> None:
+        super().__init__()
+        self.model: Optional[PlayerModel] = None
+
+    def load_model(self) -> None:
+        """Load the model weights."""
+        if self.model is None:
+            self.model = PlayerModel(PlayerConfig())
+            self.model.load_state_dict(load_attention_model_weights())
+            self.model.to(DEVICES)
+            self.model.eval()
+
+    def strategy(self, opponent: Player) -> Action:
+        """Actual strategy definition that determines player's action."""
+        # Load the model if not already loaded
+        self.load_model()
+        assert self.model is not None, "Model must be loaded before playing."
+
+        # Compute features
+        features = compute_features(self, opponent).unsqueeze(0).to(DEVICES)
+
+        # Get action from the model
+        logits = self.model(features)
+
+        # Apply sigmoid
+        logits = torch.sigmoid(logits)
+
+        return C if logits.item() < 0.5 else D
diff --git a/axelrod/strategies/axelrod_second.py b/axelrod/strategies/axelrod_second.py
index 784eb90d2..9b6b181aa 100644
--- a/axelrod/strategies/axelrod_second.py
+++ b/axelrod/strategies/axelrod_second.py
@@ -441,16 +441,16 @@ class SecondByGrofman(Player):
     1. First it cooperates on the first two rounds
     2. For rounds 3-7 inclusive, it plays the same as the opponent's last move
     3. Thereafter, it applies the following logic, looking at its memory of the
-       last 8\* rounds (ignoring the most recent round).
+       last 8\\* rounds (ignoring the most recent round).
 
       - If its own previous move was C and the opponent has defected less than
-        3 times in the last 8\* rounds, cooperate
+        3 times in the last 8\\* rounds, cooperate
       - If its own previous move was C and the opponent has defected 3 or
-        more times in the last 8\* rounds, defect
+        more times in the last 8\\* rounds, defect
       - If its own previous move was D and the opponent has defected only once
-        or not at all in the last 8\* rounds, cooperate
+        or not at all in the last 8\\* rounds, cooperate
       - If its own previous move was D and the opponent has defected more than
-        once in the last 8\* rounds, defect
+        once in the last 8\\* rounds, defect
 
     The code looks at the first 7 of the last 8 rounds, ignoring the most
     recent round.
diff --git a/axelrod/strategies/mathematicalconstants.py b/axelrod/strategies/mathematicalconstants.py
index 5d7013256..9ddfbfc4d 100644
--- a/axelrod/strategies/mathematicalconstants.py
+++ b/axelrod/strategies/mathematicalconstants.py
@@ -49,7 +49,7 @@ class Golden(CotoDeRatio):
     - Golden: Original Name by Timothy Standen
     """
 
-    name = "$\phi$"
+    name = "$\\phi$"
     ratio = (1 + math.sqrt(5)) / 2
 
 
@@ -62,7 +62,7 @@ class Pi(CotoDeRatio):
     - Pi: Original Name by Timothy Standen
     """
 
-    name = "$\pi$"
+    name = "$\\pi$"
     ratio = math.pi
 
 
diff --git a/axelrod/strategies/zero_determinant.py b/axelrod/strategies/zero_determinant.py
index 89852ae20..88e6cd22e 100644
--- a/axelrod/strategies/zero_determinant.py
+++ b/axelrod/strategies/zero_determinant.py
@@ -12,7 +12,7 @@ class LRPlayer(MemoryOnePlayer):
 
     The parameter :math:`s` is called the slope and the parameter :math:`l` the
     baseline payoff. For extortionate strategies, the extortion factor
-    :math:`\chi` is the inverse of the slope :math:`s`.
+    :math:`\\chi` is the inverse of the slope :math:`s`.
 
     For the standard prisoner's dilemma where :math:`T > R > P > S` and
     :math:`R > (T + S) / 2 > P`, a pair :math:`(l, s)` is enforceable iff
diff --git a/axelrod/tests/integration/test_matches.py b/axelrod/tests/integration/test_matches.py
index d3912de69..0e593f5eb 100644
--- a/axelrod/tests/integration/test_matches.py
+++ b/axelrod/tests/integration/test_matches.py
@@ -29,7 +29,7 @@ class TestMatchOutcomes(unittest.TestCase):
         ),
         turns=integers(min_value=1, max_value=20),
     )
-    @settings(max_examples=5)
+    @settings(max_examples=2, deadline=None)
     def test_outcome_repeats(self, strategies, turns):
         """A test that if we repeat 3 matches with deterministic and well
         behaved strategies then we get the same result"""
@@ -45,7 +45,7 @@ def test_outcome_repeats(self, strategies, turns):
         turns=integers(min_value=1, max_value=20),
         seed=integers(min_value=0, max_value=4294967295),
     )
-    @settings(max_examples=5, deadline=None)
+    @settings(max_examples=2, deadline=None)
     def test_outcome_repeats_stochastic(self, strategies, turns, seed):
         """a test to check that if a seed is set stochastic strategies give the
         same result"""
diff --git a/axelrod/tests/integration/test_tournament.py b/axelrod/tests/integration/test_tournament.py
index 36329c511..b09337807 100644
--- a/axelrod/tests/integration/test_tournament.py
+++ b/axelrod/tests/integration/test_tournament.py
@@ -45,7 +45,7 @@ def setUpClass(cls):
             max_repetitions=4,
         )
     )
-    @settings(max_examples=1)
+    @settings(max_examples=1, deadline=None)
     def test_big_tournaments(self, tournament):
         """A test to check that tournament runs with a sample of non-cheating
         strategies."""
diff --git a/axelrod/tests/property.py b/axelrod/tests/property.py
index 263d1749c..81f960706 100644
--- a/axelrod/tests/property.py
+++ b/axelrod/tests/property.py
@@ -363,7 +363,7 @@ def games(draw, prisoners_dilemma=True, max_value=100):
 
     if prisoners_dilemma:
         s_upper_bound = max_value - 4  # Ensures there is enough room
-        s = draw(integers(max_value=s_upper_bound))
+        s = draw(integers(min_value=0, max_value=s_upper_bound))
 
         t_lower_bound = s + 3  # Ensures there is enough room
         t = draw(integers(min_value=t_lower_bound, max_value=max_value))
diff --git a/axelrod/tests/strategies/test_attention.py b/axelrod/tests/strategies/test_attention.py
new file mode 100644
index 000000000..9627642f8
--- /dev/null
+++ b/axelrod/tests/strategies/test_attention.py
@@ -0,0 +1,124 @@
+"""Tests for the Attention strategies."""
+
+import unittest
+from unittest.mock import patch
+
+import torch
+
+import axelrod as axl
+from axelrod.load_data_ import load_attention_model_weights
+from axelrod.strategies.attention import (
+    MEMORY_LENGTH,
+    GameState,
+    PlayerModel,
+    actions_to_game_state,
+    compute_features,
+)
+
+from .test_player import TestPlayer
+
+C, D = axl.Action.C, axl.Action.D
+
+
+class TestFeatureComputation(unittest.TestCase):
+    """Test the feature computation functionality."""
+
+    def test_compute_features(self):
+        """Test that features are computed correctly."""
+        player = axl.MockPlayer(actions=[C, D, C, D])
+        opponent = axl.MockPlayer(actions=[D, C, C, D])
+        # Play the actions to populate history
+        match = axl.Match((player, opponent), turns=4)
+        match.play()
+
+        features = compute_features(player, opponent)
+
+        # Check the shape and type
+        self.assertIsInstance(features, torch.Tensor)
+        self.assertEqual(features.shape, (len(player.history) + 1,))
+
+        # Check specific values (CLS token and game states)
+        self.assertEqual(features[0].item(), 0)  # CLS token
+        self.assertEqual(features[1].item(), GameState.DefectDefect)
+        self.assertEqual(features[2].item(), GameState.CooperateCooperate)
+        self.assertEqual(features[3].item(), GameState.DefectCooperate)
+        self.assertEqual(features[4].item(), GameState.CooperateDefect)
+
+    def test_compute_features_right_pad(self):
+        """Test that features are computed correctly."""
+        player = axl.MockPlayer(actions=[C, D, C, D])
+        opponent = axl.MockPlayer(actions=[D, C, C, D])
+        # Play the actions to populate history
+        match = axl.Match((player, opponent), turns=4)
+        match.play()
+
+        features = compute_features(player, opponent, True)
+
+        # Check the shape and type
+        self.assertIsInstance(features, torch.Tensor)
+        self.assertEqual(features.shape, (MEMORY_LENGTH + 1,))
+
+        # Check specific values (CLS token and game states)
+        self.assertEqual(features[0].item(), 0)  # CLS token
+        self.assertEqual(features[1].item(), GameState.DefectDefect)
+        self.assertEqual(features[2].item(), GameState.CooperateCooperate)
+        self.assertEqual(features[3].item(), GameState.DefectCooperate)
+        self.assertEqual(features[4].item(), GameState.CooperateDefect)
+
+    def test_actions_to_game_state(self):
+        """Test the mapping from actions to game states."""
+        self.assertEqual(
+            actions_to_game_state(C, C), GameState.CooperateCooperate
+        )
+        self.assertEqual(actions_to_game_state(C, D), GameState.CooperateDefect)
+        self.assertEqual(actions_to_game_state(D, C), GameState.DefectCooperate)
+        self.assertEqual(actions_to_game_state(D, D), GameState.DefectDefect)
+
+
+class TestEvolvedAttention(TestPlayer):
+    name = "EvolvedAttention"
+    player = axl.EvolvedAttention
+    expected_classifier = {
+        "memory_depth": MEMORY_LENGTH,
+        "stochastic": False,
+        "makes_use_of": set(),
+        "long_run_time": True,
+        "inspects_source": False,
+        "manipulates_source": False,
+        "manipulates_state": False,
+    }
+
+    def test_model_initialization(self):
+        """Test that the model is initialized correctly."""
+        player = self.player()
+        self.assertIsNone(player.model)
+
+    def test_load_model(self):
+        """Test that the model can be loaded correctly."""
+        with patch(
+            "axelrod.strategies.attention.load_attention_model_weights",
+            wraps=load_attention_model_weights,
+        ) as load_attention_model_weights_spy:
+            player = self.player()
+            self.assertIsNone(player.model)
+            player.load_model()
+            self.assertIsInstance(player.model, PlayerModel)
+            player.load_model()
+            self.assertIsInstance(player.model, PlayerModel)
+            load_attention_model_weights_spy.assert_called_once()
+
+    def test_versus_cooperator(self):
+        actions = [(C, C)] * 5
+        self.versus_test(axl.Cooperator(), expected_actions=actions)
+
+    def test_versus_defector(self):
+        actions = [(C, D), (C, D)] + [(D, D)] * 3
+        self.versus_test(axl.Defector(), expected_actions=actions)
+
+    def test_versus_alternator(self):
+        actions = [(C, C), (C, D), (C, C), (D, D), (D, C), (D, D)]
+        self.versus_test(axl.Alternator(), expected_actions=actions)
+
+    def test_versus_handshake(self):
+        actions = [(C, C), (C, D), (C, D), (D, D), (D, D), (C, D)]
+        self.versus_test(axl.Handshake(), expected_actions=actions)
diff --git a/axelrod/tests/strategies/test_frequency_analyzer.py b/axelrod/tests/strategies/test_frequency_analyzer.py
index 51e04fb33..dbfee8e95 100644
--- a/axelrod/tests/strategies/test_frequency_analyzer.py
+++ b/axelrod/tests/strategies/test_frequency_analyzer.py
@@ -44,6 +44,12 @@ def test_strategy_cooperator(self):
         self.versus_test(
             axl.MockPlayer(opponent_actions), expected_actions=expected, seed=4
         )
+        # Test games after dataset generation phase (> 30 turns)
+        opponent_actions = [C] * 50
+        expected = [(C, C)] * 50
+        self.versus_test(
+            axl.MockPlayer(opponent_actions), expected_actions=expected, seed=4
+        )
 
     def test_strategy_random(self):
         # Test of 50 turns against random strategy
diff --git a/axelrod/tests/strategies/test_mathematicalconstants.py b/axelrod/tests/strategies/test_mathematicalconstants.py
index 64d5ec850..31a18a288 100644
--- a/axelrod/tests/strategies/test_mathematicalconstants.py
+++ b/axelrod/tests/strategies/test_mathematicalconstants.py
@@ -9,7 +9,7 @@
 
 class TestGolden(TestPlayer):
 
-    name = "$\phi$"
+    name = "$\\phi$"
     player = axl.Golden
     expected_classifier = {
         "memory_depth": float("inf"),  # Long memory
@@ -34,7 +34,7 @@ def test_strategy(self):
 
 class TestPi(TestPlayer):
 
-    name = "$\pi$"
+    name = "$\\pi$"
     player = axl.Pi
     expected_classifier = {
         "memory_depth": float("inf"),  # Long memory
diff --git a/axelrod/tests/strategies/test_meta.py b/axelrod/tests/strategies/test_meta.py
index 40ae0e73c..4b5e90aa4 100644
--- a/axelrod/tests/strategies/test_meta.py
+++ b/axelrod/tests/strategies/test_meta.py
@@ -84,7 +84,7 @@ def test_clone(self, seed):
         self.assertEqual(player2.classifier, player1.classifier)
         self.assertEqual(player2.match_attributes, player1.match_attributes)
 
-        turns = 10
+        turns = 5
         for op in [
             axl.Cooperator(),
             axl.Defector(),
@@ -104,7 +104,7 @@ def test_update_histories(self):
 
     @given(opponent_list=strategy_lists(max_size=1))
     @settings(
-        max_examples=5,
+        max_examples=1,
         deadline=None,
         suppress_health_check=(HealthCheck.differing_executors,),
     )
@@ -122,7 +122,7 @@ def test_players_return_valid_actions(self, opponent_list):
         """
         player = self.player()
         opponent = opponent_list[0]()
-        match = axl.Match(players=(player, opponent))
+        match = axl.Match(players=(player, opponent), turns=10)
         interactions = match.play()
         player_actions = set(player_action for player_action, _ in interactions)
         self.assertTrue(player_actions <= set((C, D)))
diff --git a/axelrod/tests/unit/test_classification.py b/axelrod/tests/unit/test_classification.py
index f66243b1a..758cfb4c4 100644
--- a/axelrod/tests/unit/test_classification.py
+++ b/axelrod/tests/unit/test_classification.py
@@ -303,6 +303,7 @@ def test_inclusion_of_strategy_lists(self):
     def test_long_run_strategies(self):
         long_run_time_strategies = [
             axl.DBS,
+            axl.EvolvedAttention,
             axl.MetaMajority,
             axl.MetaMajorityFiniteMemory,
             axl.MetaMajorityLongMemory,
diff --git a/axelrod/tests/unit/test_load_data.py b/axelrod/tests/unit/test_load_data.py
index 8273b1809..ddfde8a43 100644
--- a/axelrod/tests/unit/test_load_data.py
+++ b/axelrod/tests/unit/test_load_data.py
@@ -1,8 +1,15 @@
 import os
 import pathlib
 import unittest
+from unittest.mock import patch
 
-from axelrod.load_data_ import axl_filename, load_file
+import torch
+
+from axelrod.load_data_ import (
+    axl_filename,
+    load_attention_model_weights,
+    load_file,
+)
 
 
 class TestLoadData(unittest.TestCase):
@@ -27,3 +34,29 @@ def test_raise_error_if_something(self):
         bad_loader = lambda _, __: None
         with self.assertRaises(FileNotFoundError):
             load_file(path, ".", bad_loader)
+
+    def test_load_attention_model_weights(self):
+        """Test that the load_attention_model_weights function works correctly."""
+        # Create a mock object to return
+        mock_weights = {
+            "layer1": torch.tensor([1.0, 2.0]),
+            "layer2": torch.tensor([3.0, 4.0]),
+        }
+
+        # Patch torch.load to return our mock weights
+        with patch(
+            "axelrod.load_data_.torch.load", return_value=mock_weights
+        ) as mock_load:
+            # Call our function
+            result = load_attention_model_weights()
+
+            # Check that torch.load was called once
+            mock_load.assert_called_once()
+
+            # Check that the path passed to torch.load contains the expected components
+            args, kwargs = mock_load.call_args
+            self.assertIn("model_attention.pth", args[0])
+            self.assertEqual(kwargs["map_location"], torch.device("cpu"))
+
+            # Check that the function returned our mock weights
+            self.assertEqual(result, mock_weights)
diff --git a/axelrod/tests/unit/test_property.py b/axelrod/tests/unit/test_property.py
index 9e3265878..0ee18aced 100644
--- a/axelrod/tests/unit/test_property.py
+++ b/axelrod/tests/unit/test_property.py
@@ -26,7 +26,7 @@ def test_call(self):
             self.assertIsInstance(p(), axl.Player)
 
     @given(strategies=strategy_lists(min_size=1, max_size=50))
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, strategies):
         self.assertIsInstance(strategies, list)
         self.assertGreaterEqual(len(strategies), 1)
@@ -35,7 +35,7 @@ def test_decorator(self, strategies):
             self.assertIsInstance(strategy(), axl.Player)
 
     @given(strategies=strategy_lists(strategies=axl.basic_strategies))
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_with_given_strategies(self, strategies):
         self.assertIsInstance(strategies, list)
         basic_player_names = [str(s()) for s in axl.basic_strategies]
@@ -55,7 +55,7 @@ def test_call(self):
         self.assertIsInstance(match, axl.Match)
 
     @given(match=matches(min_turns=10, max_turns=50, min_noise=0, max_noise=1))
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, match):
         self.assertIsInstance(match, axl.Match)
         self.assertGreaterEqual(len(match), 10)
@@ -64,7 +64,7 @@ def test_decorator(self, match):
         self.assertLessEqual(match.noise, 1)
 
     @given(match=matches(min_turns=10, max_turns=50, min_noise=0, max_noise=0))
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_with_no_noise(self, match):
         self.assertIsInstance(match, axl.Match)
         self.assertGreaterEqual(len(match), 10)
@@ -88,7 +88,7 @@ def test_call(self):
             max_size=3,
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         self.assertLessEqual(tournament.turns, 50)
@@ -99,7 +99,7 @@ def test_decorator(self, tournament):
         self.assertGreaterEqual(tournament.repetitions, 2)
 
     @given(tournament=tournaments(strategies=axl.basic_strategies, max_size=3))
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_with_given_strategies(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         basic_player_names = [str(s()) for s in axl.basic_strategies]
@@ -123,7 +123,7 @@ def test_call(self):
             max_size=3,
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         self.assertLessEqual(tournament.prob_end, 1)
@@ -138,7 +138,7 @@ def test_decorator(self, tournament):
             strategies=axl.basic_strategies, max_size=3
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_with_given_strategies(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         basic_player_names = [str(s()) for s in axl.basic_strategies]
@@ -162,7 +162,7 @@ def test_call(self):
             max_size=3,
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         self.assertLessEqual(tournament.turns, 50)
@@ -177,7 +177,7 @@ def test_decorator(self, tournament):
             strategies=axl.basic_strategies, max_size=3
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_with_given_strategies(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         basic_player_names = [str(s()) for s in axl.basic_strategies]
@@ -201,7 +201,7 @@ def test_call(self):
             max_size=3,
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         self.assertLessEqual(tournament.prob_end, 1)
@@ -216,7 +216,7 @@ def test_decorator(self, tournament):
             strategies=axl.basic_strategies, max_size=3
         )
     )
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_with_given_strategies(self, tournament):
         self.assertIsInstance(tournament, axl.Tournament)
         basic_player_names = [str(s()) for s in axl.basic_strategies]
@@ -230,13 +230,13 @@ def test_call(self):
         self.assertIsInstance(game, axl.Game)
 
     @given(game=games())
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator(self, game):
         self.assertIsInstance(game, axl.Game)
         r, p, s, t = game.RPST()
         self.assertTrue((2 * r) > (t + s) and (t > r > p > s))
 
     @given(game=games(prisoners_dilemma=False))
-    @settings(max_examples=5)
+    @settings(max_examples=5, deadline=None)
     def test_decorator_unconstrained(self, game):
         self.assertIsInstance(game, axl.Game)
diff --git a/axelrod/tests/unit/test_random_.py b/axelrod/tests/unit/test_random_.py
index 1b27b40e6..de2aa965c 100644
--- a/axelrod/tests/unit/test_random_.py
+++ b/axelrod/tests/unit/test_random_.py
@@ -71,9 +71,10 @@ def test_generator(self):
 class TestPdf(unittest.TestCase):
     """A suite of tests for the Pdf class"""
 
+    seed = 0
     observations = [(C, D)] * 4 + [(C, C)] * 12 + [(D, C)] * 2 + [(D, D)] * 15
     counter = Counter(observations)
-    pdf = axl.Pdf(counter)
+    pdf = axl.Pdf(counter, seed=seed)
 
     def test_init(self):
         self.assertEqual(set(self.pdf.sample_space), set(self.counter.keys()))
@@ -84,9 +85,7 @@ def test_init(self):
     def test_sample(self):
         """Test that sample maps to correct domain"""
         all_samples = []
-        random = RandomGenerator()
-        random.seed(0)
-        for sample in range(100):
+        for _ in range(100):
             all_samples.append(self.pdf.sample())
 
         self.assertEqual(len(all_samples), 100)
diff --git a/axelrod/tournament.py b/axelrod/tournament.py
index 7a8823779..7825c8064 100644
--- a/axelrod/tournament.py
+++ b/axelrod/tournament.py
@@ -3,7 +3,11 @@
 import os
 import warnings
 from collections import defaultdict
-from multiprocessing import Process, Queue, cpu_count
+from multiprocessing import Process, Queue, cpu_count, set_start_method
+
+# This is necessary for the code to work on Linux
+# torch multiprocessing is not compatible with the default 'fork' method
+set_start_method("spawn", force=True)
 from tempfile import mkstemp
 from typing import List, Optional, Tuple
 
diff --git a/docs/how-to/classify_strategies.rst b/docs/how-to/classify_strategies.rst
index 8282f79f5..c529ebc67 100644
--- a/docs/how-to/classify_strategies.rst
+++ b/docs/how-to/classify_strategies.rst
@@ -110,7 +110,7 @@ Some strategies have been classified as having a particularly long run time::
     ... }
     >>> strategies = axl.filtered_strategies(filterset)
     >>> len(strategies)
-    18
+    19
 
 Strategies that :code:`manipulate_source`, :code:`manipulate_state`
 and/or :code:`inspect_source` return :code:`False` for the
diff --git a/docs/how-to/contributing/strategy/writing_the_new_strategy.rst b/docs/how-to/contributing/strategy/writing_the_new_strategy.rst
index 02c8b7064..a242444df 100644
--- a/docs/how-to/contributing/strategy/writing_the_new_strategy.rst
+++ b/docs/how-to/contributing/strategy/writing_the_new_strategy.rst
@@ -83,10 +83,10 @@ strategy::
     name = 'Tit For Tat'
 
 Note that this is mainly used in plots by :code:`matplotlib` so you can use
-LaTeX if you want to.  For example there is strategy with :math:`\pi` as a
+LaTeX if you want to.  For example there is strategy with :math:`\\pi` as a
 name::
 
-    name = '$\pi$'
+    name = '$\\pi$'
 
 Following that you can add in the :code:`classifier` dictionary::
 
diff --git a/docs/index.rst b/docs/index.rst
index 0fc7c8ff3..82b9f41b5 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -53,7 +53,7 @@ Count the number of available players::
 
     >>> import axelrod as axl
     >>> len(axl.strategies)
-    242
+    243
 
 Create matches between two players::
 
diff --git a/docs/reference/strategy_index.rst b/docs/reference/strategy_index.rst
index 1e570fac6..9764d3082 100644
--- a/docs/reference/strategy_index.rst
+++ b/docs/reference/strategy_index.rst
@@ -18,6 +18,8 @@ Here are the docstrings of all the strategies in the library.
    :members:
 .. automodule:: axelrod.strategies.appeaser
    :members:
+.. automodule:: axelrod.strategies.attention
+   :members:
 .. automodule:: axelrod.strategies.averagecopier
    :members:
 .. automodule:: axelrod.strategies.axelrod_first
diff --git a/docs/requirements.txt b/docs/requirements.txt
index b2c933e0a..0f4be075a 100644
--- a/docs/requirements.txt
+++ b/docs/requirements.txt
@@ -1,3 +1,4 @@
 docutils>=0.18.1
 numpy==1.24.3  # numpy isn't mocked due to complex use in doctests
 mock>=5.1.0
+torch>=2.6.0
\ No newline at end of file
diff --git a/docs/tutorials/running_axelrods_first_tournament/_static/running_axelrods_first_tournament/main.py b/docs/tutorials/running_axelrods_first_tournament/_static/running_axelrods_first_tournament/main.py
index f4f826a30..a3c739223 100644
--- a/docs/tutorials/running_axelrods_first_tournament/_static/running_axelrods_first_tournament/main.py
+++ b/docs/tutorials/running_axelrods_first_tournament/_static/running_axelrods_first_tournament/main.py
@@ -2,9 +2,10 @@
 Script to obtain plots for the running axelrod tournament tutorial.
 """
 
-import axelrod as axl
 import matplotlib.pyplot as plt
 
+import axelrod as axl
+
 first_tournament_participants_ordered_by_reported_rank = [
     s() for s in axl.axelrod_first_strategies
 ]
diff --git a/pyproject.toml b/pyproject.toml
index 8aec1b8ac..40d963607 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -32,6 +32,7 @@ dependencies = [
     "scipy>=1.3.3",
     "toolz>=0.8.2",
     "tqdm>=4.39.0",
+    "torch>=2.6.0",
 ]
 
 [project.optional-dependencies]
diff --git a/run_mypy.py b/run_mypy.py
index 9286bca9a..98f95be2c 100755
--- a/run_mypy.py
+++ b/run_mypy.py
@@ -20,6 +20,7 @@
     "axelrod/strategies/ann.py",
     "axelrod/strategies/apavlov.py",
     "axelrod/strategies/appeaser.py",
+    "axelrod/strategies/attention.py",
     "axelrod/strategies/averagecopier.py",
     "axelrod/strategies/axelrod_first.py",
     "axelrod/strategies/axelrod_second.py",
diff --git a/setup.py b/setup.py
index 25014f364..ed16d6a98 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,7 @@
-from collections import defaultdict
 import os
 import pathlib
+from collections import defaultdict
+
 from setuptools import setup
 
 # Read in the requirements files.
diff --git a/tox.ini b/tox.ini
index 7afca6c9d..5df968b98 100644
--- a/tox.ini
+++ b/tox.ini
@@ -28,6 +28,7 @@ deps =
     isort
     black
     numpy==1.26.4
+    torch==2.6.0
     mypy
     types-setuptools
 commands =
@@ -36,3 +37,4 @@ commands =
     python -m isort --check-only axelrod/.
     python run_mypy.py
     python run_strategy_indexer.py
+    
\ No newline at end of file