From 1191e0120d31190b696391ffc92b529b79936a23 Mon Sep 17 00:00:00 2001 From: cureprotocols Date: Sat, 29 Mar 2025 17:40:04 -0600 Subject: [PATCH 01/10] Add Exponential Search algorithm with binary search fallback --- searches/exponential_search.py | 130 ++++++++------------------------- 1 file changed, 32 insertions(+), 98 deletions(-) diff --git a/searches/exponential_search.py b/searches/exponential_search.py index ed09b14e101c..47e66e4390be 100644 --- a/searches/exponential_search.py +++ b/searches/exponential_search.py @@ -1,113 +1,47 @@ -#!/usr/bin/env python3 - """ -Pure Python implementation of exponential search algorithm +Exponential Search Algorithm -For more information, see the Wikipedia page: -https://en.wikipedia.org/wiki/Exponential_search +Time Complexity: +- Best Case: O(1) +- Average/Worst Case: O(log i), where i is the index of the first element >= target -For doctests run the following command: -python3 -m doctest -v exponential_search.py +Use Case: +Efficient for searching in sorted arrays where the target is near the beginning. -For manual testing run: -python3 exponential_search.py +Author: Michael Alexander Montoya """ -from __future__ import annotations - - -def binary_search_by_recursion( - sorted_collection: list[int], item: int, left: int = 0, right: int = -1 -) -> int: - """Pure implementation of binary search algorithm in Python using recursion - - Be careful: the collection must be ascending sorted otherwise, the result will be - unpredictable. - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :param left: starting index for the search - :param right: ending index for the search - :return: index of the found item or -1 if the item is not found - - Examples: - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 0, 0, 4) - 0 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 15, 0, 4) - 4 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 5, 0, 4) - 1 - >>> binary_search_by_recursion([0, 5, 7, 10, 15], 6, 0, 4) - -1 - """ - if right < 0: - right = len(sorted_collection) - 1 - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - if right < left: +def exponential_search(arr, target): + if len(arr) == 0: return -1 - midpoint = left + (right - left) // 2 - - if sorted_collection[midpoint] == item: - return midpoint - elif sorted_collection[midpoint] > item: - return binary_search_by_recursion(sorted_collection, item, left, midpoint - 1) - else: - return binary_search_by_recursion(sorted_collection, item, midpoint + 1, right) - - -def exponential_search(sorted_collection: list[int], item: int) -> int: - """ - Pure implementation of an exponential search algorithm in Python. - For more information, refer to: - https://en.wikipedia.org/wiki/Exponential_search - - Be careful: the collection must be ascending sorted, otherwise the result will be - unpredictable. - - :param sorted_collection: some ascending sorted collection with comparable items - :param item: item value to search - :return: index of the found item or -1 if the item is not found - - The time complexity of this algorithm is O(log i) where i is the index of the item. - - Examples: - >>> exponential_search([0, 5, 7, 10, 15], 0) - 0 - >>> exponential_search([0, 5, 7, 10, 15], 15) - 4 - >>> exponential_search([0, 5, 7, 10, 15], 5) - 1 - >>> exponential_search([0, 5, 7, 10, 15], 6) - -1 - """ - if list(sorted_collection) != sorted(sorted_collection): - raise ValueError("sorted_collection must be sorted in ascending order") - - if sorted_collection[0] == item: + if arr[0] == target: return 0 - bound = 1 - while bound < len(sorted_collection) and sorted_collection[bound] < item: - bound *= 2 + # Find range for binary search by repeated doubling + index = 1 + while index < len(arr) and arr[index] <= target: + index *= 2 - left = bound // 2 - right = min(bound, len(sorted_collection) - 1) - return binary_search_by_recursion(sorted_collection, item, left, right) + # Perform binary search in the found range + return binary_search(arr, target, index // 2, min(index, len(arr)-1)) -if __name__ == "__main__": - import doctest +def binary_search(arr, target, left, right): + while left <= right: + mid = (left + right) // 2 + if arr[mid] == target: + return mid + elif arr[mid] < target: + left = mid + 1 + else: + right = mid - 1 + return -1 - doctest.testmod() - # Manual testing - user_input = input("Enter numbers separated by commas: ").strip() - collection = sorted(int(item) for item in user_input.split(",")) - target = int(input("Enter a number to search for: ")) - result = exponential_search(sorted_collection=collection, item=target) - if result == -1: - print(f"{target} was not found in {collection}.") - else: - print(f"{target} was found at index {result} in {collection}.") +# Example usage: +if __name__ == "__main__": + array = [1, 3, 5, 7, 9, 13, 17, 21, 24, 27, 30] + target = 13 + result = exponential_search(array, target) + print(f"Target {target} found at index: {result}") From 213ff5510084ea6537fcfc0f66e741070a27398b Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 29 Mar 2025 23:46:14 +0000 Subject: [PATCH 02/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- searches/exponential_search.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/searches/exponential_search.py b/searches/exponential_search.py index 47e66e4390be..79e3e80ed059 100644 --- a/searches/exponential_search.py +++ b/searches/exponential_search.py @@ -11,6 +11,7 @@ Author: Michael Alexander Montoya """ + def exponential_search(arr, target): if len(arr) == 0: return -1 @@ -24,7 +25,7 @@ def exponential_search(arr, target): index *= 2 # Perform binary search in the found range - return binary_search(arr, target, index // 2, min(index, len(arr)-1)) + return binary_search(arr, target, index // 2, min(index, len(arr) - 1)) def binary_search(arr, target, left, right): From 8ed3e474742af652534f9d1d941156f84d38ac9b Mon Sep 17 00:00:00 2001 From: cureprotocols Date: Sat, 29 Mar 2025 18:11:10 -0600 Subject: [PATCH 03/10] Add Reservoir Sampling algorithm for streaming data --- searches/reservoir_sampling.py | 48 ++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 searches/reservoir_sampling.py diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py new file mode 100644 index 000000000000..74c3ac272069 --- /dev/null +++ b/searches/reservoir_sampling.py @@ -0,0 +1,48 @@ +""" +Reservoir Sampling Algorithm + +Use Case: +Efficient for selecting k random items from a data stream of unknown size, +or when the entire dataset cannot fit into memory. + +Time Complexity: +- O(n), where n is the total number of items +- Space Complexity: O(k) + +Author: Michael Alexander Montoya +""" + +import random + +def reservoir_sampling(stream, k): + """ + Performs reservoir sampling on a stream of items. + + Args: + stream: An iterable data stream. + k: Number of items to sample. + + Returns: + A list containing k randomly sampled items from the stream. + """ + + reservoir = [] + + for i, item in enumerate(stream): + if i < k: + reservoir.append(item) + else: + j = random.randint(0, i) + if j < k: + reservoir[j] = item + + return reservoir + + +# Example usage +if __name__ == "__main__": + stream_data = range(1, 1001) # Simulate a stream of numbers from 1 to 1000 + sample_size = 10 + + sample = reservoir_sampling(stream_data, sample_size) + print(f"Random sample of {sample_size} items from stream: {sample}") From e2900a0379a42492d58b8837ef6af29858594a1a Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 00:12:44 +0000 Subject: [PATCH 04/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- searches/reservoir_sampling.py | 1 + 1 file changed, 1 insertion(+) diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py index 74c3ac272069..807fc03fd84c 100644 --- a/searches/reservoir_sampling.py +++ b/searches/reservoir_sampling.py @@ -14,6 +14,7 @@ import random + def reservoir_sampling(stream, k): """ Performs reservoir sampling on a stream of items. From 65a42bb7e768f4add8e068220f712be0317a349a Mon Sep 17 00:00:00 2001 From: cureprotocols Date: Sat, 29 Mar 2025 18:23:31 -0600 Subject: [PATCH 05/10] Add Union-Find (Disjoint Set) with path compression --- data_structures/disjoint_set/union_find.py | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 data_structures/disjoint_set/union_find.py diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py new file mode 100644 index 000000000000..c76c4783e405 --- /dev/null +++ b/data_structures/disjoint_set/union_find.py @@ -0,0 +1,56 @@ +""" +Union-Find (Disjoint Set Union) with Path Compression and Union by Rank + +Use Case: +- Efficient structure to manage disjoint sets +- Useful in network connectivity, Kruskal's MST, and clustering + +Time Complexity: +- Nearly constant: O(α(n)) where α is the inverse Ackermann function + +Author: Michael Alexander Montoya +""" + +class UnionFind: + def __init__(self, size): + self.parent = list(range(size)) + self.rank = [0] * size + + def find(self, node): + if self.parent[node] != node: + self.parent[node] = self.find(self.parent[node]) # Path compression + return self.parent[node] + + def union(self, x, y): + rootX = self.find(x) + rootY = self.find(y) + + if rootX == rootY: + return False # Already connected + + # Union by rank + if self.rank[rootX] < self.rank[rootY]: + self.parent[rootX] = rootY + elif self.rank[rootX] > self.rank[rootY]: + self.parent[rootY] = rootX + else: + self.parent[rootY] = rootX + self.rank[rootX] += 1 + + return True + + +# Example usage +if __name__ == "__main__": + uf = UnionFind(10) + + uf.union(1, 2) + uf.union(2, 3) + uf.union(4, 5) + + print("1 and 3 connected:", uf.find(1) == uf.find(3)) # True + print("1 and 5 connected:", uf.find(1) == uf.find(5)) # False + + uf.union(3, 5) + + print("1 and 5 connected after union:", uf.find(1) == uf.find(5)) # True From be82ae40301e9bedbfb27a55436ad04bf918c347 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 00:24:49 +0000 Subject: [PATCH 06/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/disjoint_set/union_find.py | 1 + 1 file changed, 1 insertion(+) diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py index c76c4783e405..abb621a8824b 100644 --- a/data_structures/disjoint_set/union_find.py +++ b/data_structures/disjoint_set/union_find.py @@ -11,6 +11,7 @@ Author: Michael Alexander Montoya """ + class UnionFind: def __init__(self, size): self.parent = list(range(size)) From 563681b02710ec6f35c8bb18f573a318d46c852a Mon Sep 17 00:00:00 2001 From: cureprotocols Date: Sat, 29 Mar 2025 18:31:09 -0600 Subject: [PATCH 07/10] Refactor: add type hints, doctests, and naming fixes for bot compliance --- data_structures/disjoint_set/union_find.py | 56 ++++++++++++++++------ searches/reservoir_sampling.py | 31 +++++++----- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py index abb621a8824b..da97ed12bd12 100644 --- a/data_structures/disjoint_set/union_find.py +++ b/data_structures/disjoint_set/union_find.py @@ -13,38 +13,64 @@ class UnionFind: - def __init__(self, size): + def __init__(self, size: int) -> None: + """ + Initializes a Union-Find data structure with `size` elements. + + >>> uf = UnionFind(5) + >>> uf.find(0) + 0 + """ self.parent = list(range(size)) self.rank = [0] * size - def find(self, node): + def find(self, node: int) -> int: + """ + Finds the representative/root of the set that `node` belongs to. + + >>> uf = UnionFind(5) + >>> uf.find(3) + 3 + """ if self.parent[node] != node: self.parent[node] = self.find(self.parent[node]) # Path compression return self.parent[node] - def union(self, x, y): - rootX = self.find(x) - rootY = self.find(y) - - if rootX == rootY: + def union(self, a: int, b: int) -> bool: + """ + Unites the sets that contain elements `a` and `b`. + + >>> uf = UnionFind(5) + >>> uf.union(0, 1) + True + >>> uf.find(1) == uf.find(0) + True + >>> uf.union(0, 1) + False + """ + root_a = self.find(a) + root_b = self.find(b) + + if root_a == root_b: return False # Already connected # Union by rank - if self.rank[rootX] < self.rank[rootY]: - self.parent[rootX] = rootY - elif self.rank[rootX] > self.rank[rootY]: - self.parent[rootY] = rootX + if self.rank[root_a] < self.rank[root_b]: + self.parent[root_a] = root_b + elif self.rank[root_a] > self.rank[root_b]: + self.parent[root_b] = root_a else: - self.parent[rootY] = rootX - self.rank[rootX] += 1 + self.parent[root_b] = root_a + self.rank[root_a] += 1 return True -# Example usage if __name__ == "__main__": - uf = UnionFind(10) + import doctest + doctest.testmod() + uf = UnionFind(10) uf.union(1, 2) uf.union(2, 3) uf.union(4, 5) diff --git a/searches/reservoir_sampling.py b/searches/reservoir_sampling.py index 807fc03fd84c..e6e6d2c2b0f8 100644 --- a/searches/reservoir_sampling.py +++ b/searches/reservoir_sampling.py @@ -2,48 +2,53 @@ Reservoir Sampling Algorithm Use Case: -Efficient for selecting k random items from a data stream of unknown size, +Efficient for selecting `sample_size` random items from a data stream of unknown size, or when the entire dataset cannot fit into memory. Time Complexity: - O(n), where n is the total number of items -- Space Complexity: O(k) +- Space Complexity: O(sample_size) Author: Michael Alexander Montoya """ import random +from typing import Iterable -def reservoir_sampling(stream, k): +def reservoir_sampling(stream: Iterable[int], sample_size: int) -> list[int]: """ Performs reservoir sampling on a stream of items. Args: stream: An iterable data stream. - k: Number of items to sample. + sample_size: Number of items to sample. Returns: - A list containing k randomly sampled items from the stream. - """ + A list containing `sample_size` randomly sampled items from the stream. + >>> result = reservoir_sampling(range(1, 1001), 10) + >>> len(result) == 10 + True + """ reservoir = [] for i, item in enumerate(stream): - if i < k: + if i < sample_size: reservoir.append(item) else: j = random.randint(0, i) - if j < k: + if j < sample_size: reservoir[j] = item return reservoir -# Example usage if __name__ == "__main__": - stream_data = range(1, 1001) # Simulate a stream of numbers from 1 to 1000 - sample_size = 10 + import doctest + + doctest.testmod() - sample = reservoir_sampling(stream_data, sample_size) - print(f"Random sample of {sample_size} items from stream: {sample}") + stream_data = range(1, 1001) + sample = reservoir_sampling(stream_data, 10) + print(f"Sampled items: {sample}") From 8991be09fc6c34f32db120c89012600481f595dc Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 00:32:21 +0000 Subject: [PATCH 08/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/disjoint_set/union_find.py | 1 + 1 file changed, 1 insertion(+) diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py index da97ed12bd12..3e258a19f5f4 100644 --- a/data_structures/disjoint_set/union_find.py +++ b/data_structures/disjoint_set/union_find.py @@ -68,6 +68,7 @@ def union(self, a: int, b: int) -> bool: if __name__ == "__main__": import doctest + doctest.testmod() uf = UnionFind(10) From 9563b1cf16336c33dd036dcc9d74d067d9e5276c Mon Sep 17 00:00:00 2001 From: cureprotocols Date: Sat, 29 Mar 2025 18:36:24 -0600 Subject: [PATCH 09/10] Refactor: renamed union() parameters for descriptive clarity --- data_structures/disjoint_set/union_find.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py index 3e258a19f5f4..fed0026badae 100644 --- a/data_structures/disjoint_set/union_find.py +++ b/data_structures/disjoint_set/union_find.py @@ -11,7 +11,6 @@ Author: Michael Alexander Montoya """ - class UnionFind: def __init__(self, size: int) -> None: """ @@ -36,9 +35,9 @@ def find(self, node: int) -> int: self.parent[node] = self.find(self.parent[node]) # Path compression return self.parent[node] - def union(self, a: int, b: int) -> bool: + def union(self, node_a: int, node_b: int) -> bool: """ - Unites the sets that contain elements `a` and `b`. + Unites the sets that contain elements `node_a` and `node_b`. >>> uf = UnionFind(5) >>> uf.union(0, 1) @@ -48,13 +47,12 @@ def union(self, a: int, b: int) -> bool: >>> uf.union(0, 1) False """ - root_a = self.find(a) - root_b = self.find(b) + root_a = self.find(node_a) + root_b = self.find(node_b) if root_a == root_b: return False # Already connected - # Union by rank if self.rank[root_a] < self.rank[root_b]: self.parent[root_a] = root_b elif self.rank[root_a] > self.rank[root_b]: From 8be9d1add38e07132c1fd781cd7e0cca7f5afb62 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 30 Mar 2025 00:37:50 +0000 Subject: [PATCH 10/10] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- data_structures/disjoint_set/union_find.py | 1 + 1 file changed, 1 insertion(+) diff --git a/data_structures/disjoint_set/union_find.py b/data_structures/disjoint_set/union_find.py index fed0026badae..12e2c83fe942 100644 --- a/data_structures/disjoint_set/union_find.py +++ b/data_structures/disjoint_set/union_find.py @@ -11,6 +11,7 @@ Author: Michael Alexander Montoya """ + class UnionFind: def __init__(self, size: int) -> None: """