From 36d1b7e7e040f14c65a80f3e18fc5a43f0d33ce2 Mon Sep 17 00:00:00 2001 From: EthanReid Date: Wed, 26 Feb 2025 01:47:33 +0000 Subject: [PATCH 1/2] log --- moondream/torch/moondream.py | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/moondream/torch/moondream.py b/moondream/torch/moondream.py index 6baf2d1b..11d44d63 100644 --- a/moondream/torch/moondream.py +++ b/moondream/torch/moondream.py @@ -369,6 +369,7 @@ def _generate_points( next_token.item() != self.config.tokenizer.eos_id and len(out) < max_points ): + x_logits = decode_coordinate(hidden, self.region) x_center = torch.argmax(x_logits, dim=-1) / x_logits.size(-1) next_emb = encode_coordinate( @@ -390,9 +391,17 @@ def _generate_points( mask[:, :, pos], pos_ids[0] = 1, pos logits, hidden = self._decode_one_tok(next_emb, mask, pos_ids) pos += 1 + size_logits = decode_size(hidden, self.region) - w = torch.argmax(size_logits[0], dim=-1) / size_logits.size(-1) - h = torch.argmax(size_logits[1], dim=-1) / size_logits.size(-1) + + w_bin = torch.argmax(size_logits[0], dim=-1).float() + w_log2 = w_bin / 1023.0 * 10.0 - 10.0 + w = 2.0**w_log2 + + h_bin = torch.argmax(size_logits[1], dim=-1).float() + h_log2 = h_bin / 1023.0 * 10.0 - 10.0 + h = 2.0**h_log2 + next_emb = encode_size( torch.tensor( [w, h], device=self.device, dtype=size_logits.dtype @@ -445,7 +454,7 @@ def detect( hidden = hidden[:, -1:, :] objects = self._generate_points( - hidden, next_token, pos, include_size=True, max_points=50 + hidden, next_token, pos, include_size=True, max_points=500 ) return {"objects": objects} From d5f1c18f6c7bf2a8f3b125fde5aee8df9a7c71a7 Mon Sep 17 00:00:00 2001 From: EthanReid Date: Thu, 27 Mar 2025 03:31:16 -0400 Subject: [PATCH 2/2] 500 to 50 points --- moondream/torch/moondream.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/moondream/torch/moondream.py b/moondream/torch/moondream.py index 11d44d63..8960192c 100644 --- a/moondream/torch/moondream.py +++ b/moondream/torch/moondream.py @@ -454,7 +454,7 @@ def detect( hidden = hidden[:, -1:, :] objects = self._generate_points( - hidden, next_token, pos, include_size=True, max_points=500 + hidden, next_token, pos, include_size=True, max_points=50 ) return {"objects": objects}