Skip to content

Commit

Permalink
A little nicer
Browse files Browse the repository at this point in the history
  • Loading branch information
andrewdalpino committed Oct 13, 2024
1 parent 9599487 commit efec696
Show file tree
Hide file tree
Showing 5 changed files with 20 additions and 15 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
__pycache__/
.mypy_cache/
env/
build/
develop-eggs/
Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,11 +8,12 @@ version = "0.0.1"
requires-python = ">= 3.10"
dependencies = [
"numpy>=1.19.5",
"nptyping>=2.5.0",
]
authors = [
{name = "Andrew DalPino", email = "[email protected]"},
]
description = "PyBloomer is an Python implementation of the OkBloomer algorithm, an autoscaling Bloom filter with ultra-low memory footprint."
description = "A Python implementation of the OkBloomer algorithm, an autoscaling Bloom filter with ultra-low memory footprint."
readme = "README.md"
license = {text = "MIT"}

Expand Down
File renamed without changes.
12 changes: 7 additions & 5 deletions src/pybloomer/bloom_filter.py → src/okbloomer/bloom_filter.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
from nptyping import NDArray

class BloomFilter(object):
"""
Expand All @@ -8,7 +9,8 @@ class BloomFilter(object):

MAX_SLICE_SIZE = 2147483647

n = 0 # The number of bits currently stored in the filter
n = 0 # The number of bits currently stored in the filter.
m = 0 # The maximum number of bits that can be stored in the filter.

def __init__(self,
max_false_positive_rate: float = 0.01,
Expand All @@ -33,8 +35,9 @@ def __init__(self,
self.num_hashes = num_hashes
self.layer_size = layer_size
self.slice_size = slice_size
self.layers = [np.zeros(layer_size, dtype='bool')]
self.m = layer_size
self.layers: list[NDArray] = []

self._add_layer()

@property
def num_layers(self) -> int:
Expand Down Expand Up @@ -127,8 +130,7 @@ def exists_or_insert(self, token: str) -> bool:

def _add_layer(self) -> None:
"""
Add another layer to the filter for maintaining the false positivity rate
below the threshold.
Add another layer to the filter for maintaining the false positivity rate below the threshold.
"""
self.layers.append(np.zeros(self.layer_size, dtype='bool'))

Expand Down
19 changes: 10 additions & 9 deletions tests/test_bloom_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,11 @@
import random
import string

import pybloomer
import okbloomer

class TestBloomFilter(unittest.TestCase):
def test_basic(self):
filter = pybloomer.BloomFilter()
filter = okbloomer.BloomFilter()

self.assertEqual(filter.false_positive_rate, 0)

Expand All @@ -27,7 +27,7 @@ def test_basic(self):
self.assertFalse(filter.exists('baz'))

def test_exists_or_insert(self):
filter = pybloomer.BloomFilter()
filter = okbloomer.BloomFilter()

self.assertFalse(filter.exists_or_insert('foo'))

Expand All @@ -42,10 +42,10 @@ def test_exists_or_insert(self):
self.assertTrue(filter.exists_or_insert('baz'))

def test_autoscaling(self):
random.seed(0)
random.seed(1)

filter = pybloomer.BloomFilter(
max_false_positive_rate=0.01,
filter = okbloomer.BloomFilter(
max_false_positive_rate=0.001,
num_hashes=4,
layer_size=320000,
)
Expand All @@ -59,10 +59,11 @@ def test_autoscaling(self):

filter.insert('bar')

self.assertEqual(filter.num_layers, 3)
self.assertLessEqual(filter.false_positive_rate, 0.01)
self.assertEqual(filter.num_layers, 6)
self.assertLessEqual(filter.false_positive_rate, 0.001)
self.assertLessEqual(filter.utilization, 1.0)
self.assertGreater(filter.capacity, 0.0)

self.assertTrue(filter.exists('foo'))
self.assertTrue(filter.exists('bar'))
self.assertFalse(filter.exists('father'))
self.assertFalse(filter.exists('baz'))

0 comments on commit efec696

Please sign in to comment.