Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -93,8 +93,8 @@ python ./demo/demo_mlx.py \
--hardware ./demo/hardware/c4dm-disklavier.json \
--midi_control_signal 67 \
--midi_reset_control_signal 66 \
--temp 0.9 \
--min_p 0.03
--temp 0.85 \
--min_p 0.05
```

A MIDI input device is not strictly required to play around with the demo: By using the `--midi_path` and `--midi_through` arguments you can mock real-time input by playing from a MIDI file. All that is required are MIDI drivers (e.g., CoreMIDI) and a virtual software instrument (e.g., Fluidsynth, Pianoteq) to render the output. In this mode, you can initiate the model takeover by pressing the enter key.
Expand All @@ -107,7 +107,7 @@ python ./demo/demo_mlx.py \
--midi_path ${MIDI_PATH} \
--midi_through <midi-playback-port> \
--midi_out <midi-playback-port> \
--temp 0.9 \
--temp 0.85 \
--min_p 0.05
```

Expand Down
5 changes: 2 additions & 3 deletions aria/inference/model_cuda.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def _att_block(
freqs_cis: torch.Tensor,
mask: torch.Tensor,
):

q, k, v = self.mixed_qkv(x).split(
[self.d_model, self.d_model, self.d_model], dim=-1
)
Expand Down Expand Up @@ -166,7 +165,7 @@ def fill_condition_kv(self, emb: torch.Tensor):
assert self.model_config.emb_size is not None

input_pos = torch.tensor([0], device=emb.device)
mask = self.causal_mask[None, None, input_pos]
mask = self.causal_mask[input_pos].unsqueeze(0).unsqueeze(0)
freqs_cis = self.freqs_cis[input_pos]

x = emb.unsqueeze(dim=1)
Expand All @@ -182,7 +181,7 @@ def forward(
):
assert self.freqs_cis is not None, "Caches must be initialized first"

mask = self.causal_mask[None, None, input_pos]
mask = self.causal_mask[input_pos].unsqueeze(0).unsqueeze(0)

if pad_idxs is not None:
mask = mask & ~(pad_idxs.unsqueeze(1).unsqueeze(1))
Expand Down
2 changes: 1 addition & 1 deletion demo/demo_mlx.py
Original file line number Diff line number Diff line change
Expand Up @@ -696,7 +696,7 @@ def decode_tokens(
logits[:, tokenizer.tok_to_id[tokenizer.eos_tok]] = float("-inf")

if temperature > 0.0:
next_token_ids = sample_min_p(logits, min_p).flatten()
next_token_ids = sample_min_p(logits / temperature, min_p).flatten()
else:
next_token_ids = mx.argmax(logits, axis=-1).flatten()

Expand Down
Binary file modified example-prompts/smooth_jazz.mid
Binary file not shown.