From 1191e0120d31190b696391ffc92b529b79936a23 Mon Sep 17 00:00:00 2001
From: cureprotocols <cureprotocols@protonmail.com>
Date: Sat, 29 Mar 2025 17:40:04 -0600
Subject: [PATCH 01/10] Add Exponential Search algorithm with binary search
 fallback

---
 searches/exponential_search.py | 130 ++++++++-------------------------
 1 file changed, 32 insertions(+), 98 deletions(-)

diff --git a/searches/exponential_search.py b/searches/exponential_search.py
index ed09b14e101c..47e66e4390be 100644
--- a/searches/exponential_search.py
+++ b/searches/exponential_search.py
@@ -1,113 +1,47 @@
-#!/usr/bin/env python3
-
 """
-Pure Python implementation of exponential search algorithm
+Exponential Search Algorithm
 
-For more information, see the Wikipedia page:
-https://en.wikipedia.org/wiki/Exponential_search
+Time Complexity:
+- Best Case: O(1)
+- Average/Worst Case: O(log i), where i is the index of the first element >= target
 
-For doctests run the following command:
-python3 -m doctest -v exponential_search.py
+Use Case:
+Efficient for searching in sorted arrays where the target is near the beginning.
 
-For manual testing run:
-python3 exponential_search.py
+Author: Michael Alexander Montoya
 """
 
-from __future__ import annotations
-
-
-def binary_search_by_recursion(
-    sorted_collection: list[int], item: int, left: int = 0, right: int = -1
-) -> int:
-    """Pure implementation of binary search algorithm in Python using recursion
-
-    Be careful: the collection must be ascending sorted otherwise, the result will be
-    unpredictable.
-
-    :param sorted_collection: some ascending sorted collection with comparable items
-    :param item: item value to search
-    :param left: starting index for the search
-    :param right: ending index for the search
-    :return: index of the found item or -1 if the item is not found
-
-    Examples:
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4)
-    0
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4)
-    4
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4)
-    1
-    >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4)
-    -1
-    """
-    if right < 0:
-        right = len(sorted_collection) - 1
-    if list(sorted_collection) != sorted(sorted_collection):
-        raise ValueError("sorted_collection must be sorted in ascending order")
-    if right < left:
+def exponential_search(arr, target):
+    if len(arr) == 0:
         return -1
 
-    midpoint = left + (right - left) // 2
-
-    if sorted_collection[midpoint] == item:
-        return midpoint
-    elif sorted_collection[midpoint] > item:
-        return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1)
-    else:
-        return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right)
-
-
-def exponential_search(sorted_collection: list[int], item: int) -> int:
-    """
-    Pure implementation of an exponential search algorithm in Python.
-    For more information, refer to:
-    https://en.wikipedia.org/wiki/Exponential_search
-
-    Be careful: the collection must be ascending sorted, otherwise the result will be
-    unpredictable.
-
-    :param sorted_collection: some ascending sorted collection with comparable items
-    :param item: item value to search
-    :return: index of the found item or -1 if the item is not found
-
-    The time complexity of this algorithm is O(log i) where i is the index of the item.
-
-    Examples:
-    >>> exponential_search([0, 5, 7, 10, 15], 0)
-    0
-    >>> exponential_search([0, 5, 7, 10, 15], 15)
-    4
-    >>> exponential_search([0, 5, 7, 10, 15], 5)
-    1
-    >>> exponential_search([0, 5, 7, 10, 15], 6)
-    -1
-    """
-    if list(sorted_collection) != sorted(sorted_collection):
-        raise ValueError("sorted_collection must be sorted in ascending order")
-
-    if sorted_collection[0] == item:
+    if arr[0] == target:
         return 0
 
-    bound = 1
-    while bound < len(sorted_collection) and sorted_collection[bound] < item:
-        bound *= 2
+    # Find range for binary search by repeated doubling
+    index = 1
+    while index < len(arr) and arr[index] <= target:
+        index *= 2
 
-    left = bound // 2
-    right = min(bound, len(sorted_collection) - 1)
-    return binary_search_by_recursion(sorted_collection, item, left, right)
+    # Perform binary search in the found range
+    return binary_search(arr, target, index // 2, min(index, len(arr)-1))
 
 
-if __name__ == "__main__":
-    import doctest
+def binary_search(arr, target, left, right):
+    while left <= right:
+        mid = (left + right) // 2
+        if arr[mid] == target:
+            return mid
+        elif arr[mid] < target:
+            left = mid + 1
+        else:
+            right = mid - 1
+    return -1
 
-    doctest.testmod()
 
-    # Manual testing
-    user_input = input("Enter numbers separated by commas: ").strip()
-    collection = sorted(int(item) for item in user_input.split(","))
-    target = int(input("Enter a number to search for: "))
-    result = exponential_search(sorted_collection=collection, item=target)
-    if result == -1:
-        print(f"{target} was not found in {collection}.")
-    else:
-        print(f"{target} was found at index {result} in {collection}.")
+# Example usage:
+if __name__ == "__main__":
+    array = [1, 3, 5, 7, 9, 13, 17, 21, 24, 27, 30]
+    target = 13
+    result = exponential_search(array, target)
+    print(f"Target {target} found at index: {result}")

From 213ff5510084ea6537fcfc0f66e741070a27398b Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sat, 29 Mar 2025 23:46:14 +0000
Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 searches/exponential_search.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/searches/exponential_search.py b/searches/exponential_search.py
index 47e66e4390be..79e3e80ed059 100644
--- a/searches/exponential_search.py
+++ b/searches/exponential_search.py
@@ -11,6 +11,7 @@
 Author: Michael Alexander Montoya
 """
 
+
 def exponential_search(arr, target):
     if len(arr) == 0:
         return -1
@@ -24,7 +25,7 @@ def exponential_search(arr, target):
         index *= 2
 
     # Perform binary search in the found range
-    return binary_search(arr, target, index // 2, min(index, len(arr)-1))
+    return binary_search(arr, target, index // 2, min(index, len(arr) - 1))
 
 
 def binary_search(arr, target, left, right):

From 8ed3e474742af652534f9d1d941156f84d38ac9b Mon Sep 17 00:00:00 2001
From: cureprotocols <cureprotocols@protonmail.com>
Date: Sat, 29 Mar 2025 18:11:10 -0600
Subject: [PATCH 03/10] Add Reservoir Sampling algorithm for streaming data

---
 searches/reservoir_sampling.py | 48 ++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)
 create mode 100644 searches/reservoir_sampling.py

diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py
new file mode 100644
index 000000000000..74c3ac272069
--- /dev/null
+++ b/searches/reservoir_sampling.py
@@ -0,0 +1,48 @@
+"""
+Reservoir Sampling Algorithm
+
+Use Case:
+Efficient for selecting k random items from a data stream of unknown size,
+or when the entire dataset cannot fit into memory.
+
+Time Complexity:
+- O(n), where n is the total number of items
+- Space Complexity: O(k)
+
+Author: Michael Alexander Montoya
+"""
+
+import random
+
+def reservoir_sampling(stream, k):
+    """
+    Performs reservoir sampling on a stream of items.
+
+    Args:
+        stream: An iterable data stream.
+        k: Number of items to sample.
+
+    Returns:
+        A list containing k randomly sampled items from the stream.
+    """
+
+    reservoir = []
+
+    for i, item in enumerate(stream):
+        if i < k:
+            reservoir.append(item)
+        else:
+            j = random.randint(0, i)
+            if j < k:
+                reservoir[j] = item
+
+    return reservoir
+
+
+# Example usage
+if __name__ == "__main__":
+    stream_data = range(1, 1001)  # Simulate a stream of numbers from 1 to 1000
+    sample_size = 10
+
+    sample = reservoir_sampling(stream_data, sample_size)
+    print(f"Random sample of {sample_size} items from stream: {sample}")

From e2900a0379a42492d58b8837ef6af29858594a1a Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 30 Mar 2025 00:12:44 +0000
Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 searches/reservoir_sampling.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py
index 74c3ac272069..807fc03fd84c 100644
--- a/searches/reservoir_sampling.py
+++ b/searches/reservoir_sampling.py
@@ -14,6 +14,7 @@
 
 import random
 
+
 def reservoir_sampling(stream, k):
     """
     Performs reservoir sampling on a stream of items.

From 65a42bb7e768f4add8e068220f712be0317a349a Mon Sep 17 00:00:00 2001
From: cureprotocols <cureprotocols@protonmail.com>
Date: Sat, 29 Mar 2025 18:23:31 -0600
Subject: [PATCH 05/10] Add Union-Find (Disjoint Set) with path compression

---
 data_structures/disjoint_set/union_find.py | 56 ++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 data_structures/disjoint_set/union_find.py

diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
new file mode 100644
index 000000000000..c76c4783e405
--- /dev/null
+++ b/data_structures/disjoint_set/union_find.py
@@ -0,0 +1,56 @@
+"""
+Union-Find (Disjoint Set Union) with Path Compression and Union by Rank
+
+Use Case:
+- Efficient structure to manage disjoint sets
+- Useful in network connectivity, Kruskal's MST, and clustering
+
+Time Complexity:
+- Nearly constant: O(α(n)) where α is the inverse Ackermann function
+
+Author: Michael Alexander Montoya
+"""
+
+class UnionFind:
+    def __init__(self, size):
+        self.parent = list(range(size))
+        self.rank = [0] * size
+
+    def find(self, node):
+        if self.parent[node] != node:
+            self.parent[node] = self.find(self.parent[node])  # Path compression
+        return self.parent[node]
+
+    def union(self, x, y):
+        rootX = self.find(x)
+        rootY = self.find(y)
+
+        if rootX == rootY:
+            return False  # Already connected
+
+        # Union by rank
+        if self.rank[rootX] < self.rank[rootY]:
+            self.parent[rootX] = rootY
+        elif self.rank[rootX] > self.rank[rootY]:
+            self.parent[rootY] = rootX
+        else:
+            self.parent[rootY] = rootX
+            self.rank[rootX] += 1
+
+        return True
+
+
+# Example usage
+if __name__ == "__main__":
+    uf = UnionFind(10)
+
+    uf.union(1, 2)
+    uf.union(2, 3)
+    uf.union(4, 5)
+
+    print("1 and 3 connected:", uf.find(1) == uf.find(3))  # True
+    print("1 and 5 connected:", uf.find(1) == uf.find(5))  # False
+
+    uf.union(3, 5)
+
+    print("1 and 5 connected after union:", uf.find(1) == uf.find(5))  # True

From be82ae40301e9bedbfb27a55436ad04bf918c347 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 30 Mar 2025 00:24:49 +0000
Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 data_structures/disjoint_set/union_find.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
index c76c4783e405..abb621a8824b 100644
--- a/data_structures/disjoint_set/union_find.py
+++ b/data_structures/disjoint_set/union_find.py
@@ -11,6 +11,7 @@
 Author: Michael Alexander Montoya
 """
 
+
 class UnionFind:
     def __init__(self, size):
         self.parent = list(range(size))

From 563681b02710ec6f35c8bb18f573a318d46c852a Mon Sep 17 00:00:00 2001
From: cureprotocols <cureprotocols@protonmail.com>
Date: Sat, 29 Mar 2025 18:31:09 -0600
Subject: [PATCH 07/10] Refactor: add type hints, doctests, and naming fixes
 for bot compliance

---
 data_structures/disjoint_set/union_find.py | 56 ++++++++++++++++------
 searches/reservoir_sampling.py             | 31 +++++++-----
 2 files changed, 59 insertions(+), 28 deletions(-)

diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
index abb621a8824b..da97ed12bd12 100644
--- a/data_structures/disjoint_set/union_find.py
+++ b/data_structures/disjoint_set/union_find.py
@@ -13,38 +13,64 @@
 
 
 class UnionFind:
-    def __init__(self, size):
+    def __init__(self, size: int) -> None:
+        """
+        Initializes a Union-Find data structure with `size` elements.
+
+        >>> uf = UnionFind(5)
+        >>> uf.find(0)
+        0
+        """
         self.parent = list(range(size))
         self.rank = [0] * size
 
-    def find(self, node):
+    def find(self, node: int) -> int:
+        """
+        Finds the representative/root of the set that `node` belongs to.
+
+        >>> uf = UnionFind(5)
+        >>> uf.find(3)
+        3
+        """
         if self.parent[node] != node:
             self.parent[node] = self.find(self.parent[node])  # Path compression
         return self.parent[node]
 
-    def union(self, x, y):
-        rootX = self.find(x)
-        rootY = self.find(y)
-
-        if rootX == rootY:
+    def union(self, a: int, b: int) -> bool:
+        """
+        Unites the sets that contain elements `a` and `b`.
+
+        >>> uf = UnionFind(5)
+        >>> uf.union(0, 1)
+        True
+        >>> uf.find(1) == uf.find(0)
+        True
+        >>> uf.union(0, 1)
+        False
+        """
+        root_a = self.find(a)
+        root_b = self.find(b)
+
+        if root_a == root_b:
             return False  # Already connected
 
         # Union by rank
-        if self.rank[rootX] < self.rank[rootY]:
-            self.parent[rootX] = rootY
-        elif self.rank[rootX] > self.rank[rootY]:
-            self.parent[rootY] = rootX
+        if self.rank[root_a] < self.rank[root_b]:
+            self.parent[root_a] = root_b
+        elif self.rank[root_a] > self.rank[root_b]:
+            self.parent[root_b] = root_a
         else:
-            self.parent[rootY] = rootX
-            self.rank[rootX] += 1
+            self.parent[root_b] = root_a
+            self.rank[root_a] += 1
 
         return True
 
 
-# Example usage
 if __name__ == "__main__":
-    uf = UnionFind(10)
+    import doctest
+    doctest.testmod()
 
+    uf = UnionFind(10)
     uf.union(1, 2)
     uf.union(2, 3)
     uf.union(4, 5)
diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py
index 807fc03fd84c..e6e6d2c2b0f8 100644
--- a/searches/reservoir_sampling.py
+++ b/searches/reservoir_sampling.py
@@ -2,48 +2,53 @@
 Reservoir Sampling Algorithm
 
 Use Case:
-Efficient for selecting k random items from a data stream of unknown size,
+Efficient for selecting `sample_size` random items from a data stream of unknown size,
 or when the entire dataset cannot fit into memory.
 
 Time Complexity:
 - O(n), where n is the total number of items
-- Space Complexity: O(k)
+- Space Complexity: O(sample_size)
 
 Author: Michael Alexander Montoya
 """
 
 import random
+from typing import Iterable
 
 
-def reservoir_sampling(stream, k):
+def reservoir_sampling(stream: Iterable[int], sample_size: int) -> list[int]:
     """
     Performs reservoir sampling on a stream of items.
 
     Args:
         stream: An iterable data stream.
-        k: Number of items to sample.
+        sample_size: Number of items to sample.
 
     Returns:
-        A list containing k randomly sampled items from the stream.
-    """
+        A list containing `sample_size` randomly sampled items from the stream.
 
+    >>> result = reservoir_sampling(range(1, 1001), 10)
+    >>> len(result) == 10
+    True
+    """
     reservoir = []
 
     for i, item in enumerate(stream):
-        if i < k:
+        if i < sample_size:
             reservoir.append(item)
         else:
             j = random.randint(0, i)
-            if j < k:
+            if j < sample_size:
                 reservoir[j] = item
 
     return reservoir
 
 
-# Example usage
 if __name__ == "__main__":
-    stream_data = range(1, 1001)  # Simulate a stream of numbers from 1 to 1000
-    sample_size = 10
+    import doctest
+
+    doctest.testmod()
 
-    sample = reservoir_sampling(stream_data, sample_size)
-    print(f"Random sample of {sample_size} items from stream: {sample}")
+    stream_data = range(1, 1001)
+    sample = reservoir_sampling(stream_data, 10)
+    print(f"Sampled items: {sample}")

From 8991be09fc6c34f32db120c89012600481f595dc Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 30 Mar 2025 00:32:21 +0000
Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 data_structures/disjoint_set/union_find.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
index da97ed12bd12..3e258a19f5f4 100644
--- a/data_structures/disjoint_set/union_find.py
+++ b/data_structures/disjoint_set/union_find.py
@@ -68,6 +68,7 @@ def union(self, a: int, b: int) -> bool:
 
 if __name__ == "__main__":
     import doctest
+
     doctest.testmod()
 
     uf = UnionFind(10)

From 9563b1cf16336c33dd036dcc9d74d067d9e5276c Mon Sep 17 00:00:00 2001
From: cureprotocols <cureprotocols@protonmail.com>
Date: Sat, 29 Mar 2025 18:36:24 -0600
Subject: [PATCH 09/10] Refactor: renamed union() parameters for descriptive
 clarity

---
 data_structures/disjoint_set/union_find.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
index 3e258a19f5f4..fed0026badae 100644
--- a/data_structures/disjoint_set/union_find.py
+++ b/data_structures/disjoint_set/union_find.py
@@ -11,7 +11,6 @@
 Author: Michael Alexander Montoya
 """
 
-
 class UnionFind:
     def __init__(self, size: int) -> None:
         """
@@ -36,9 +35,9 @@ def find(self, node: int) -> int:
             self.parent[node] = self.find(self.parent[node])  # Path compression
         return self.parent[node]
 
-    def union(self, a: int, b: int) -> bool:
+    def union(self, node_a: int, node_b: int) -> bool:
         """
-        Unites the sets that contain elements `a` and `b`.
+        Unites the sets that contain elements `node_a` and `node_b`.
 
         >>> uf = UnionFind(5)
         >>> uf.union(0, 1)
@@ -48,13 +47,12 @@ def union(self, a: int, b: int) -> bool:
         >>> uf.union(0, 1)
         False
         """
-        root_a = self.find(a)
-        root_b = self.find(b)
+        root_a = self.find(node_a)
+        root_b = self.find(node_b)
 
         if root_a == root_b:
             return False  # Already connected
 
-        # Union by rank
         if self.rank[root_a] < self.rank[root_b]:
             self.parent[root_a] = root_b
         elif self.rank[root_a] > self.rank[root_b]:

From 8be9d1add38e07132c1fd781cd7e0cca7f5afb62 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 30 Mar 2025 00:37:50 +0000
Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 data_structures/disjoint_set/union_find.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py
index fed0026badae..12e2c83fe942 100644
--- a/data_structures/disjoint_set/union_find.py
+++ b/data_structures/disjoint_set/union_find.py
@@ -11,6 +11,7 @@
 Author: Michael Alexander Montoya
 """
 
+
 class UnionFind:
     def __init__(self, size: int) -> None:
         """