From efec69688fa802ba102f622aaec56293805837bd Mon Sep 17 00:00:00 2001 From: Andrew DalPino Date: Sat, 12 Oct 2024 19:45:29 -0500 Subject: [PATCH] A little nicer --- .gitignore | 1 + pyproject.toml | 3 ++- src/{pybloomer => okbloomer}/__init__.py | 0 src/{pybloomer => okbloomer}/bloom_filter.py | 12 +++++++----- tests/test_bloom_filter.py | 19 ++++++++++--------- 5 files changed, 20 insertions(+), 15 deletions(-) rename src/{pybloomer => okbloomer}/__init__.py (100%) rename src/{pybloomer => okbloomer}/bloom_filter.py (93%) diff --git a/.gitignore b/.gitignore index 83a0944..92bfc7e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,5 @@ __pycache__/ +.mypy_cache/ env/ build/ develop-eggs/ diff --git a/pyproject.toml b/pyproject.toml index 222dea4..a1cc2b8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,11 +8,12 @@ version = "0.0.1" requires-python = ">= 3.10" dependencies = [ "numpy>=1.19.5", + "nptyping>=2.5.0", ] authors = [ {name = "Andrew DalPino", email = "support@andrewdalpino.com"}, ] -description = "PyBloomer is an Python implementation of the OkBloomer algorithm, an autoscaling Bloom filter with ultra-low memory footprint." +description = "A Python implementation of the OkBloomer algorithm, an autoscaling Bloom filter with ultra-low memory footprint." readme = "README.md" license = {text = "MIT"} diff --git a/src/pybloomer/__init__.py b/src/okbloomer/__init__.py similarity index 100% rename from src/pybloomer/__init__.py rename to src/okbloomer/__init__.py diff --git a/src/pybloomer/bloom_filter.py b/src/okbloomer/bloom_filter.py similarity index 93% rename from src/pybloomer/bloom_filter.py rename to src/okbloomer/bloom_filter.py index dcd1136..7d62426 100644 --- a/src/pybloomer/bloom_filter.py +++ b/src/okbloomer/bloom_filter.py @@ -1,4 +1,5 @@ import numpy as np +from nptyping import NDArray class BloomFilter(object): """ @@ -8,7 +9,8 @@ class BloomFilter(object): MAX_SLICE_SIZE = 2147483647 - n = 0 # The number of bits currently stored in the filter + n = 0 # The number of bits currently stored in the filter. + m = 0 # The maximum number of bits that can be stored in the filter. def __init__(self, max_false_positive_rate: float = 0.01, @@ -33,8 +35,9 @@ def __init__(self, self.num_hashes = num_hashes self.layer_size = layer_size self.slice_size = slice_size - self.layers = [np.zeros(layer_size, dtype='bool')] - self.m = layer_size + self.layers: list[NDArray] = [] + + self._add_layer() @property def num_layers(self) -> int: @@ -127,8 +130,7 @@ def exists_or_insert(self, token: str) -> bool: def _add_layer(self) -> None: """ - Add another layer to the filter for maintaining the false positivity rate - below the threshold. + Add another layer to the filter for maintaining the false positivity rate below the threshold. """ self.layers.append(np.zeros(self.layer_size, dtype='bool')) diff --git a/tests/test_bloom_filter.py b/tests/test_bloom_filter.py index 920cf92..fdf8941 100644 --- a/tests/test_bloom_filter.py +++ b/tests/test_bloom_filter.py @@ -2,11 +2,11 @@ import random import string -import pybloomer +import okbloomer class TestBloomFilter(unittest.TestCase): def test_basic(self): - filter = pybloomer.BloomFilter() + filter = okbloomer.BloomFilter() self.assertEqual(filter.false_positive_rate, 0) @@ -27,7 +27,7 @@ def test_basic(self): self.assertFalse(filter.exists('baz')) def test_exists_or_insert(self): - filter = pybloomer.BloomFilter() + filter = okbloomer.BloomFilter() self.assertFalse(filter.exists_or_insert('foo')) @@ -42,10 +42,10 @@ def test_exists_or_insert(self): self.assertTrue(filter.exists_or_insert('baz')) def test_autoscaling(self): - random.seed(0) + random.seed(1) - filter = pybloomer.BloomFilter( - max_false_positive_rate=0.01, + filter = okbloomer.BloomFilter( + max_false_positive_rate=0.001, num_hashes=4, layer_size=320000, ) @@ -59,10 +59,11 @@ def test_autoscaling(self): filter.insert('bar') - self.assertEqual(filter.num_layers, 3) - self.assertLessEqual(filter.false_positive_rate, 0.01) + self.assertEqual(filter.num_layers, 6) + self.assertLessEqual(filter.false_positive_rate, 0.001) self.assertLessEqual(filter.utilization, 1.0) + self.assertGreater(filter.capacity, 0.0) self.assertTrue(filter.exists('foo')) self.assertTrue(filter.exists('bar')) - self.assertFalse(filter.exists('father')) + self.assertFalse(filter.exists('baz'))