From d01baec08fe509895da7985be0ffbd4367c1cf4f Mon Sep 17 00:00:00 2001 From: patx Date: Fri, 10 Jan 2025 08:16:03 -0500 Subject: [PATCH] MAJOR RELEASE: VERSION 1.0. BREAKS BACKWARD COMPATABILITYgit add . SEE README.md FOR COMPLETE CHANGELOG --- README.md | 309 +++++++++++++---------- docs/.commands.html.swp | Bin 1024 -> 0 bytes docs/commands.html | 131 ++++++---- docs/index.html | 126 +++++----- pickledb.py | 534 ++++++++-------------------------------- pkldb/README.md | 169 ------------- pkldb/pkldb.py | 170 ------------- pkldb/pkldb_tests.py | 60 ----- setup.py | 45 ++-- tests.py | 199 ++++++++------- 10 files changed, 553 insertions(+), 1190 deletions(-) delete mode 100644 docs/.commands.html.swp delete mode 100644 pkldb/README.md delete mode 100644 pkldb/pkldb.py delete mode 100644 pkldb/pkldb_tests.py diff --git a/README.md b/README.md index e9075ff..bd6ea2a 100644 --- a/README.md +++ b/README.md @@ -1,196 +1,249 @@ -![Download badge](http://pepy.tech/badge/pickledb) +# **pickleDB: Your Lightweight, High-Speed Key-Value Store** -# pickleDB -pickleDB is lightweight, fast, and simple database based on the -[json](https://docs.python.org/3/library/json.html) module. -And it's BSD licensed! +## **Fast. Simple. Reliable.** +Unlock the power of effortless data storage with **pickleDB**—the no-fuss, blazing-fast key-value store designed for Python developers. Whether you're building a small script or a performant microservice, pickleDB delivers simplicity and speed with the reliability you can count on. +--- -## pickleDB is Fun -```python ->>> import pickledb +## **Why Choose pickleDB?** ->>> db = pickledb.load('test.db', False) +### ✅ **Blazing Speed** +Backed by the high-performance [orjson](https://pypi.org/project/orjson/) library, pickleDB handles millions of records with ease. Perfect for applications where every millisecond counts. ->>> db.set('key', 'value') +### ✅ **Ridiculously Easy to Use** +With its minimalist API, pickleDB makes adding, retrieving, and managing your data as simple as writing a Python list. No steep learning curves. No unnecessary complexity. ->>> db.get('key') -'value' +### ✅ **Rock-Solid Reliability** +Your data deserves to be safe. Atomic saves ensure your database remains consistent—even if something goes wrong. ->>> db.dump() -True -``` +### ✅ **Pythonic Flexibility** +Store strings, lists, dictionaries, and more—all with native Python operations. No need to learn special commands. If you know Python, you already know pickleDB. -## Installation -- Just add the `pickledb.py` file to your working directory or use `pip install pickledb`. -- pickleDB also includes a simplified and faster version using orjson called `pkldb.py`, to use this add it to your working directory and note the slight difference in setup `from pkldb.py import pkldb` then `db = pkldb('example.json')` +--- -# PickleDB Documentation +## **Getting Started** -## Introduction -PickleDB is a lightweight, file-based key-value store with optional support for time-to-live (TTL). It provides a simple and intuitive API for storing and managing data persistently. +### **Install in Seconds** +pickleDB is available on PyPI. Get started with just one command: +```bash +pip install pickledb +``` ---- +### **Your First pickleDB** +```python +from pickledb import PickleDB -## Table of Contents -1. **Basic Usage** -2. **Key-Value Methods** -3. **List Methods** -4. **Dictionary Methods** -5. **Enhanced Features** +# Initialize the database +db = PickleDB('my_database.db') ---- +# Add a key-value pair +db.set('greeting', 'Hello, world!') -## 1. Basic Usage -```python -from pickledb_enhanced import load +# Retrieve the value +print(db.get('greeting')) # Output: Hello, world! -db = load('mydb.json', auto_dump=True, enable_ttl=True) +# Save the data to disk +db.save() ``` -- `auto_dump`: Automatically save changes to the file. -- `enable_ttl`: Enable TTL support for expiring keys. +It’s that simple! In just a few lines, you have a fully functioning key-value store. --- -## 2. Key-Value Methods +## **More Examples to Get You Inspired** -### `set(key, value, ttl=None)` -Set a key-value pair in the database. - -### `get(key)` -Retrieve the value associated with a key. +### **Store and Retrieve Complex Data** +PickleDB works seamlessly with Python data structures. Example: +```python +# Store a dictionary +db.set('user', {'name': 'Alice', 'age': 30, 'city': 'Wonderland'}) -### `exists(key)` -Check if a key exists. +# Retrieve and update it +user = db.get('user') +user['age'] += 1 -### `rem(key)` -Remove a key from the database. +# Save the updated data +db.set('user', user) +print(db.get('user')) # Output: {'name': 'Alice', 'age': 31, 'city': 'Wonderland'} +``` -### `getall()` -Get all keys in the database. +### **Use Lists for Dynamic Data** +Handle lists with ease: +```python +# Add a list of items +db.set('tasks', ['Write code', 'Test app', 'Deploy']) -### `clear()` -Clear all keys. +# Retrieve and modify +tasks = db.get('tasks') +tasks.append('Celebrate') +db.set('tasks', tasks) -### `deldb()` -Delete the database file. +print(db.get('tasks')) # Output: ['Write code', 'Test app', 'Deploy', 'Celebrate'] +``` ---- +### **Store Configurations** +Create a simple, persistent configuration store: +```python +# Set configuration options +db.set('config', {'theme': 'dark', 'notifications': True}) + +# Access and update settings +config = db.get('config') +config['notifications'] = False +db.set('config', config) +print(db.get('config')) # Output: {'theme': 'dark', 'notifications': False} +``` -## 3. List Methods +### **Session Management** +Track user sessions effortlessly: +```python +# Add session data +db.set('session_12345', {'user_id': 1, 'status': 'active'}) -### `lcreate(name)` -Create a new list in the database. +# End a session +session = db.get('session_12345') +session['status'] = 'inactive' +db.set('session_12345', session) -### `ladd(name, value)` -Add a value to an existing list. +print(db.get('session_12345')) # Output: {'user_id': 1, 'status': 'inactive'} +``` -### `lgetall(name)` -Retrieve all values from a list. +--- -### `lsort(name, reverse=False)` -Sort a list in ascending or descending order. -- `reverse`: Sort in descending order if `True`. +## **Performance Highlights** -### `lremove(name, value)` -Remove a value from a list. +pickleDB demonstrates strong performance for handling large-sized datasets: -### `lgetrange(name, start, end)` -Retrieve a range of values from a list. -- `start`: Start index. -- `end`: End index. +| Entries | Memory Load Time | Retrieval Time | Save Time | +|--------------|------------------|----------------|-----------| +| **1M** | 1.21 sec | 0.90 sec | 0.17 sec | +| **10M** | 14.11 sec | 10.30 sec | 1.67 sec | +| **50M** | 93.79 sec | 136.42 sec | 61.08 sec | -### `llen(name)` -Get the length of a list. +Tests were performed on a StarLabs StarLite Mk IV (Quad-Core Intel® Pentium® Silver N5030 CPU @ 1.10GHz w/ 8GB memory) running elementary OS 7.1 Horus. --- -## 4. Dictionary Methods +## **Minimal, Powerful API** + +pickleDB offers a clean and Pythonic API for managing data efficiently: -### `dcreate(name)` -Create a new dictionary in the database. +### **`set(key, value)`** +Add or update a key-value pair: +```python +# Add a new key-value pair +db.set('username', 'admin') -### `dadd(name, key, value)` -Add a key-value pair to a dictionary. +# Update an existing key-value pair +db.set('username', 'superadmin') +print(db.get('username')) # Output: 'superadmin' +``` -### `dget(name, key)` -Retrieve a value from a dictionary. +### **`get(key)`** +Retrieve the value associated with a key: +```python +# Get the value for a key +print(db.get('username')) # Output: 'superadmin' -### `dgetall(name)` -Retrieve all key-value pairs from a dictionary. +# Attempt to retrieve a non-existent key +print(db.get('nonexistent_key')) # Output: None +``` -### `dremove(name, key)` -Remove a key from a dictionary. +### **`all()`** +Get a list of all keys: +```python +# Add multiple keys +db.set('item1', 'value1') +db.set('item2', 'value2') -### `dmerge(name, other_dict)` -Merge another dictionary into an existing dictionary. +# Retrieve all keys +print(db.all()) # Output: ['username', 'item1', 'item2'] +``` -### `dkeys(name)` -Get all keys from a dictionary. +### **`remove(key)`** +Delete a key and its value: +```python +# Remove a key-value pair +db.remove('item1') +print(db.all()) # Output: ['username', 'item2'] +``` -### `dvalues(name)` -Get all values from a dictionary. +### **`purge()`** +Clear all data in the database: +```python +# Clear the database +db.purge() +print(db.all()) # Output: [] +``` + +### **`save()`** +Persist the database to disk: +```python +# Save the current state of the database +db.save() +print("Database saved successfully!") +``` --- -## 5. Enhanced Features +## **Key Improvements in Version 1.0** -### **TTL Support** -- Expire keys automatically after a given time. +pickleDB 1.0 is a reimagined version designed for speed, simplicity, and reliability. Key changes include: -### **File Compression** -- Compress the database file to save space. +- **Atomic Saves**: Ensures data integrity during writes, eliminating potential corruption issues. +- **Faster Serialization**: Switched to `orjson` for significantly improved speed. +- **Streamlined API**: Removed legacy methods (e.g., `ladd`, `dmerge`) in favor of native Python operations. +- **Unified Handling of Data Types**: Treats all Python-native types (lists, dicts, etc.) as first-class citizens. +- **Explicit Saves**: The `auto_save` feature was removed to provide users greater control and optimize performance. -### **Automatic Persistence** -- Save changes automatically using `auto_dump`. +If backward compatibility is essential, version 0.9 is still available: +- View the legacy code [here](https://gist.github.com/patx/3ad47fc3814d7293feb902f6ab49c48f). +- Install it by: + ```bash + pip uninstall pickledb + ``` + Then download the legacy file and include it in your project. --- -## Example Usage +## **Limitations** -### **Working with Lists** -```python -# Create a list and add values -db.lcreate('mylist') -db.ladd('mylist', 'item1') -db.ladd('mylist', 'item2') +While pickleDB is powerful, it’s important to understand its limitations: -# Sort the list -db.lsort('mylist') # ['item1', 'item2'] +- **Memory Usage**: The entire dataset is loaded into memory, which might be a constraint on systems with limited RAM for extremely large datasets. +- **Single-Threaded**: The program is not thread-safe. For concurrent access, use external synchronization like Python's `RLock()`. +- **Blocking Saves**: Saves are blocking by default. To achieve non-blocking saves, use asynchronous wrappers. +- **Lack of Advanced Features**: pickleDB is designed for simplicity, so it may not meet the needs of applications requiring advanced database features. -# Get a range of values -db.lgetrange('mylist', 0, 1) # ['item1'] +For projects requiring more robust solutions, consider alternatives like **[kenobiDB](Https://github.com/patx/kenobi)**, [Redis](http://redis.io/), [SQLite](https://www.sqlite.org/), or [MongoDB](https://www.mongodb.com/). -# Remove an item -db.lremove('mylist', 'item1') -``` +--- -### **Working with Dictionaries** -```python -# Create a dictionary and add values -db.dcreate('mydict') -db.dadd('mydict', 'key1', 'value1') -db.dadd('mydict', 'key2', 'value2') +## **Asynchronous Saves** +Want non-blocking saves? You can implement an async wrapper to handle saves in the background. This is particularly useful for applications that need high responsiveness without delaying due to disk operations, like small web applications. Check out examples [here](https://gist.github.com/patx/5c12d495ff142f3262325eeae81eb000). -# Merge another dictionary -db.dmerge('mydict', {'key3': 'value3'}) +--- -# Get all keys and values -db.dkeys('mydict') # ['key1', 'key2', 'key3'] -db.dvalues('mydict') # ['value1', 'value2', 'value3'] +## **Community & Contributions** -# Remove a key -db.dremove('mydict', 'key1') -``` +### **Join the Community** +We’re passionate about making pickleDB better every day. Got ideas, feedback, or an issue to report? Let’s connect: +- **File an Issue**: [GitHub Issues](https://github.com/patx/pickledb/issues) +- **Ask Questions**: Reach out to our growing community of users and developers. ---- +### **Contribute to pickleDB** +Want to leave your mark? Help us make pickleDB even better: +- **Submit a Pull Request**: Whether it's fixing a bug, improving the documentation, or adding a feature, we’d love your contributions. +- **Suggest New Features**: Share your ideas to make pickleDB more powerful. -## Notes -- Always ensure proper file permissions for the database file. -- Use thread-safe practices when accessing the database concurrently. +Together, we can build a better tool for everyone. --- -## Changelog -- **Enhanced Features**: Added methods for list sorting, removal, range fetching, and dictionary merging. +## **Documentation** + +Explore the full capabilities of pickleDB with our detailed documentation: +- **API Reference**: [Commands and Examples](https://patx.github.io/pickledb/commands.html) +- [GitHub Repository](https://github.com/patx/pickledb) +- [Installation Details (PyPI)](http://pypi.python.org/pypi/pickleDB) +Whether you're a beginner or an experienced developer, these resources will guide you through everything pickleDB has to offer. diff --git a/docs/.commands.html.swp b/docs/.commands.html.swp deleted file mode 100644 index 5a930dc62074d13cae21ae5445681b3e52a2dd5f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 1024 zcmYc?$V<%2S1{7E)H7y40?~pD3>k? - + + - - - -pickleDB - simple key-value database - + + + + + pickleDB API Documentation + - -
-

Current Commands

-

LOAD path auto_dump → Load a database from a path with auto_dump enabled or not (available since 0.1)

-

SET key value → Set the value of a str key (available since 0.1)

-

GET key → Get the value of a key (available since 0.1)

-

GETALL → Return a list of all keys in database (available since 0.4)

-

REM key → Delete a key (available since 0.1)

-

APPEND key more → Add more to a key's value (available since 0.1.3)

-

EXISTS key → Determine if a key exists (available since 0.7.2)

-

TOTALKEYS name → Get the total number of keys in whole database or in a specified (name) dict or list (available since 0.7.3)

-

LCREATE name → Create a list with str name (available since 0.1)

-

LADD name value → Add a value to a list (available since 0.1)

-

LGETALL name → Return all values in a list (available since 0.1)

-

LEXTEND name seq → Extend a list with a sequence (available since 0.6)

-

LGET name pos → Return one value in a list (available since 0.1)

-

LRANGE name start end → Return all the values from a given range in a list

-

LREMLIST name → Remove a list and all of its values (available since 0.1)

-

LREMVALUE name value → Remove a value from list name (available since 0.8.2)

-

LPOP name pos → Remove one value in a list (available since 0.1)

-

LLEN name → Return the length of a list (available since 0.6)

-

LAPPEND name pos more → Add more to a value in a list (available since 0.1.3)

-

LEXISTS name value → Determine if a value is in a certain list (available since 0.7.2)

-

DCREATE name → Create a dict with str name (available since 0.2.2)

-

DADD name pair → Add a key-value pair to a dict, pair is a tuple (available since 0.2.2)

-

DGETALL name → Return all key-value pairs from a dict (available since 0.2.2)

-

DGET name key → Return the value for a key in a dict (available since 0.2.2)

-

DKEYS name → Return all the keys for a dict (available since 0.6)

-

DVALS name → Return all the values for a dict (available) since 0.6)

-

DEXISTS name key → Determine if a key exists (available since 0.6)

-

DREM name → Remove a dict and all of its pairs (available since 0.2.2)

-

DPOP name key → Remove one key-value in a dict (available since 0.2.2)

-

DMERGE name1 name2 name3 → Merge name1 and name2 into a new dict: name3 (available since 0.7.3) -

DELDB → Delete everything from the database (available since 0.2.1)

-

DUMP → Save the database from memory to a file specified in LOAD (available since 0.3)

-

Suggestions

-

If you would like to suggest a command, you can create an issue on GitHub.

-
+ +
+

API Documentation

+

For examples and help on getting started/installation see the README on GitHub.

+ +

+ Class Initialization +

+ +

PickleDB(path)

+

Initialize a PickleDB instance with the specified path.

+
    +
  • path: The path to the database file.
  • +
+ +

+ PickleDB Class Methods +

+ +

set(key, value) → Add or update a key-value pair in the database.

+
    +
  • key: The key to set. Converted to string if not already.
  • +
  • value: The value to associate with the key. This can be any JSON serializable Python data type.
  • +
  • Returns: True.
  • +
+ +

get(key) → Retrieve the value associated with a key.

+
    +
  • key: The key to retrieve.
  • +
  • Returns: The value associated with the key, or None if the key does not exist.
  • +
+ +

all() → Retrieve a list of all keys in the database.

+
    +
  • Returns: A list of keys.
  • +
+ +

remove(key) → Delete a key and its value from the database.

+
    +
  • key: The key to delete.
  • +
  • Returns: True if the key was deleted, or False if the key does not exist.
  • +
+ +

purge() → Clear all keys and values from the database.

+
    +
  • Returns: True.
  • +
+ +

save() → Save the current state of the database to the file.

+
    +
  • Returns: True if the operation succeeds, or False otherwise.
  • +
  • For larger datasets consider using a non-blocking wrapper around this method like this. +
+ +

Suggestions

+

If you would like to suggest an improvement or report an issue, please create an issue on GitHub.

+
diff --git a/docs/index.html b/docs/index.html index 7c93c50..3187522 100644 --- a/docs/index.html +++ b/docs/index.html @@ -1,66 +1,82 @@ - - + + - - - -pickleDB - simple key-value database - - + + + + + pickleDB - Simple Key-Value Database + - -
-

Welcome

-pickleDB is a lightweight and simple key-value store. -It is built upon Python's json -module and was inspired by redis. It is licensed -with the BSD three-clause license. -

pickleDB is Fun

- ->>> import pickledb -

->>> db = pickledb.load('example.db', False) -

+ +
+

Welcome

+

pickleDB is a lightweight, simple and fast key-value store. + It is built upon the orjson module for extremely high performance and was inspired by redis. It is licensed under the BSD three-clause license.

+ +

pickleDB is Fun

+

+>>> from pickledb import pickleDB
+
+>>> db = PickleDB('example.json')
+
 >>> db.set('key', 'value')
-
True -

+ >>> db.get('key') -
'value' -

->>> db.dump() -
+ +>>> db.save() True -
-

And Easy to Install

- +
+ +

And Easy to Install

+

 $ pip install pickledb
-
-

More Information

-You can view all of pickleDB's commands and what they do here.
-
-pickleDB was written by Harrison Erd. If you would like -to file an issue report or fork the project, -check out the Github project page. -You can also take a look at pickleDB on PyPI.
-
-pickleDB got its name from Python's "pickle" module, which it previously used. -However, now pickleDB uses the "json" module. It is faster and cleaner. -But the name stuck! -
-
-
-Fork me on GitHub -
+
+ +

More Information

+

You can view all of pickleDB's commands and what they do here.

+

pickleDB was written by Harrison Erd. If you would like to file an issue report or fork the project, check out the Github project page. You can also take a look at pickleDB on PyPI.

+ + + Fork me on GitHub + +
+ diff --git a/pickledb.py b/pickledb.py index a5bcb97..7fb1b93 100644 --- a/pickledb.py +++ b/pickledb.py @@ -1,500 +1,160 @@ +""" +Copyright Harrison Erd + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, +this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright notice, +this list of conditions and the following disclaimer in the documentation +and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from this +software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, +EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +""" import os -import sys -import signal -import shutil -import json -import gzip -from tempfile import NamedTemporaryFile -from threading import RLock -from time import time - -def load(location, auto_dump=True, enable_ttl=False): - """ - Create and return a PickleDB object. - - Args: - location (str): Path to the JSON file. - auto_dump (bool): If True, automatically save changes to the file. - enable_ttl (bool): If True, enable time-to-live (TTL) support for keys. - - Returns: - PickleDB: The initialized PickleDB object. - """ - return PickleDB(location, auto_dump, enable_ttl) - +import orjson class PickleDB: """ - A lightweight, file-based key-value store with optional TTL support. + A barebones orjson-based key-value store with essential methods: set, + get, save, remove, purge, and all. """ - def __init__(self, location, auto_dump=True, enable_ttl=False): + def __init__(self, location): """ - Initialize the PickleDB object. + Initialize the pkldb object. Args: location (str): Path to the JSON file. - auto_dump (bool): Automatically save changes to the file. - enable_ttl (bool): Enable TTL support for keys. """ self.location = os.path.expanduser(location) - self.auto_dump = auto_dump - self.enable_ttl = enable_ttl - self._lock = RLock() - self.db = {} - self.ttl = {} self._load() - self._set_signal_handler() - - def __getitem__(self, item): - '''Syntax sugar for get()''' - return self.get(item) - - def __setitem__(self, key, value): - '''Sytax sugar for set()''' - return self.set(key, value) - - def __delitem__(self, key): - '''Sytax sugar for rem()''' - return self.rem(key) - - def _set_signal_handler(self): - """Set up signal handler for graceful shutdown.""" - signal.signal(signal.SIGTERM, self._graceful_exit) - - def _graceful_exit(self, *args): - """Ensure any ongoing dump completes before exiting.""" - self.dump() - sys.exit(0) def _load(self): - """Load data from the JSON file.""" - if os.path.exists(self.location): + """ + Load data from the JSON file if it exists, or initialize an empty + database. + """ + if (os.path.exists(self.location) and + os.path.getsize(self.location) > 0): try: - with open(self.location, 'rt') as f: - self.db = json.load(f) - except (ValueError, json.JSONDecodeError): + with open(self.location, 'rb') as f: + self.db = orjson.loads(f.read()) + print("Database loaded") + except Exception as e: self.db = {} + print(f"Failed to load database: {e}") else: self.db = {} + print("Database created") - def _dump(self): - """Dump the database to a temporary file and replace the original.""" - with NamedTemporaryFile(mode='wt', delete=False) as temp_file: - json.dump(self.db, temp_file) - os.replace(temp_file.name, self.location) # Atomic replace - - def dump(self): - """Force save the database to the file.""" - with self._lock: - self._dump() - - def _autodump(self): - """Automatically dump the database if auto_dump is enabled.""" - if self.auto_dump: - self.dump() - - def set(self, key, value, ttl=None): - """ - Set a key-value pair in the database. - - Args: - key (str): The key to set. - value (any): The value to associate with the key. - ttl (int, optional): Time-to-live in seconds. Defaults to None. - - Returns: - bool: True if the operation succeeds. + def save(self): """ - if not isinstance(key, str): - raise TypeError("Key must be a string.") - with self._lock: - self.db[key] = value - if ttl and self.enable_ttl: - self.ttl[key] = time() + ttl - self._autodump() - return True + Save the database to the file using an atomic save. - def get(self, key, default_value=None): - """ - Get the value associated with a key. - - Args: - key (str): The key to retrieve. + Behavior: + - Writes to a temporary file and replaces the + original file only after the write is successful, + ensuring data integrity. Returns: - any: The value associated with the key, or None if the key does not exist or has expired. + bool: True if save was successful, False if not. """ - with self._lock: - try: - if self.enable_ttl and key in self.ttl: - if time() > self.ttl[key]: - self.rem(key) - return None - return self.db.get(key) - except KeyError: - return default_value - - def exists(self, key): - """ - Check if a key exists in the database. - - Args: - key (str): The key to check. - - Returns: - bool: True if the key exists, False otherwise. - """ - return key in self.db - - def rem(self, key): - """ - Remove a key from the database. - - Args: - key (str): The key to remove. - - Returns: - bool: True if the key was removed, False if it did not exist. - """ - with self._lock: - if key in self.db: - del self.db[key] - if key in self.ttl: - del self.ttl[key] - self._autodump() - return True - return False - - def getall(self): - """ - Retrieve all keys in the database. - - Returns: - list: A list of all keys in the database. - """ - return list(self.db.keys()) - - def clear(self): - """ - Remove all keys from the database. - - Returns: - bool: True if the operation succeeds. - """ - with self._lock: - self.db.clear() - self.ttl.clear() - self._autodump() - return True - - def deldb(self): - """ - Delete the entire database. - - Returns: - bool: True if the operation succeeds. - """ - with self._lock: - self.db = {} - self.ttl = {} - if os.path.exists(self.location): - os.remove(self.location) - return True - - def compress(self): - """ - Compress the database file using gzip. - - Returns: - bool: True if the operation succeeds. - """ - with self._lock: - compressed_file = f"{self.location}.gz" - with open(self.location, 'rb') as f_in: - with gzip.open(compressed_file, 'wb') as f_out: - shutil.copyfileobj(f_in, f_out) - return True + temp_location = f"{self.location}.tmp" + try: + with open(temp_location, 'wb') as temp_file: + temp_file.write(orjson.dumps(self.db)) + os.replace(temp_location, self.location) # Atomic replace + return True + except Exception as e: + print(f"Failed to write database to disk: {e}") + return False - def append(self, key, value): + def set(self, key, value): """ - Append a value to an existing list. + Add or update a key-value pair in the database. Args: - key (str): The key of the list. - value (any): The value to append. + key (any): The key to set. If the key is not a string, it will be + converted to a string. + value (any): The value to associate with the key. Returns: bool: True if the operation succeeds. - """ - with self._lock: - if key not in self.db or not isinstance(self.db[key], list): - raise TypeError("Key must reference a list.") - self.db[key].append(value) - self._autodump() - return True - - def lcreate(self, name): - """ - Create a new list in the database. - Args: - name (str): The name of the list. - - Returns: - bool: True if the operation succeeds. + Behavior: + - If the key already exists, its value will be updated. + - If the key does not exist, it will be added to the database. """ - if not isinstance(name, str): - raise TypeError("List name must be a string.") - with self._lock: - if name in self.db: - raise ValueError("List already exists.") - self.db[name] = [] - self._autodump() + key = str(key) if not isinstance(key, str) else key + self.db[key] = value return True - def ladd(self, name, value): + def remove(self, key): """ - Add a value to an existing list. + Remove a key and its value from the database. Args: - name (str): The name of the list. - value (any): The value to add. + key (any): The key to delete. If the key is not a string, it will + be converted to a string. Returns: - bool: True if the operation succeeds. + bool: True if the key was deleted, False if the key does not exist. """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], list): - raise TypeError("List does not exist or is not a valid list.") - self.db[name].append(value) - self._autodump() - return True - - def lgetall(self, name): - """ - Retrieve all values from a list. - - Args: - name (str): The name of the list. - - Returns: - list: All values in the list. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], list): - raise TypeError("List does not exist or is not a valid list.") - return list(self.db[name]) - - def lsort(self, name, reverse=False): - """ - Sort a list in the database. - - Args: - name (str): The name of the list. - reverse (bool): Sort in descending order if True. - - Returns: - list: The sorted list. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], list): - raise TypeError("List does not exist or is not a valid list.") - self.db[name].sort(reverse=reverse) - self._autodump() - return self.db[name] - - def lremove(self, name, value): - """ - Remove a value from a list. - - Args: - name (str): The name of the list. - value (any): The value to remove. - - Returns: - bool: True if the value was removed, False otherwise. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], list): - raise TypeError("List does not exist or is not a valid list.") - try: - self.db[name].remove(value) - self._autodump() - return True - except ValueError: - return False - - def lgetrange(self, name, start, end): - """ - Get a range of values from a list. - - Args: - name (str): The name of the list. - start (int): The starting index. - end (int): The ending index. - - Returns: - list: The sublist from start to end. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], list): - raise TypeError("List does not exist or is not a valid list.") - return self.db[name][start:end] - - def llen(self, name): - """ - Get the length of a list. - - Args: - name (str): The name of the list. - - Returns: - int: The length of the list. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], list): - raise TypeError("List does not exist or is not a valid list.") - return len(self.db[name]) - - def dcreate(self, name): - """ - Create a new dictionary in the database. - - Args: - name (str): The name of the dictionary. - - Returns: - bool: True if the operation succeeds. - """ - if not isinstance(name, str): - raise TypeError("Dictionary name must be a string.") - with self._lock: - if name in self.db: - raise ValueError("Dictionary already exists.") - self.db[name] = {} - self._autodump() - return True + key = str(key) if not isinstance(key, str) else key + if key in self.db: + del self.db[key] + return True + return False - def dadd(self, name, key, value): + def purge(self): """ - Add a key-value pair to a dictionary. - - Args: - name (str): The name of the dictionary. - key (str): The key to add. - value (any): The value to associate with the key. + Clear all keys from the database. Returns: bool: True if the operation succeeds. """ - if not isinstance(key, str): - raise TypeError("Key must be a string.") - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - self.db[name][key] = value - self._autodump() + self.db.clear() return True - def dget(self, name, key): - """ - Retrieve a value from a dictionary. - - Args: - name (str): The name of the dictionary. - key (str): The key to retrieve. - - Returns: - any: The value associated with the key. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - return self.db[name].get(key) - - def dgetall(self, name): - """ - Retrieve all key-value pairs from a dictionary. - - Args: - name (str): The name of the dictionary. - - Returns: - dict: All key-value pairs in the dictionary. + def get(self, key): """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - return self.db[name] - - def dremove(self, name, key): - """ - Remove a key from a dictionary. - - Args: - name (str): The name of the dictionary. - key (str): The key to remove. - - Returns: - bool: True if the key was removed, False otherwise. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - if key in self.db[name]: - del self.db[name][key] - self._autodump() - return True - return False - - def dmerge(self, name, other_dict): - """ - Merge another dictionary into an existing dictionary. + Get the value associated with a key. Args: - name (str): The name of the dictionary. - other_dict (dict): The dictionary to merge. + key (any): The key to retrieve. If the key is not a string, it will + be converted to a string. Returns: - dict: The updated dictionary. + any: The value associated with the key, or None if the key does + not exist. """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - if not isinstance(other_dict, dict): - raise TypeError("Argument must be a dictionary.") - self.db[name].update(other_dict) - self._autodump() - return self.db[name] + key = str(key) if not isinstance(key, str) else key + return self.db.get(key) - def dkeys(self, name): + def all(self): """ - Get all keys from a dictionary. - - Args: - name (str): The name of the dictionary. + Get a list of all keys in the database. Returns: - list: A list of keys in the dictionary. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - return list(self.db[name].keys()) - - def dvalues(self, name): + list: A list of all keys. """ - Get all values from a dictionary. - - Args: - name (str): The name of the dictionary. + return list(self.db.keys()) - Returns: - list: A list of values in the dictionary. - """ - with self._lock: - if name not in self.db or not isinstance(self.db[name], dict): - raise TypeError("Dictionary does not exist or is not a valid dictionary.") - return list(self.db[name].values()) diff --git a/pkldb/README.md b/pkldb/README.md deleted file mode 100644 index c57dc2f..0000000 --- a/pkldb/README.md +++ /dev/null @@ -1,169 +0,0 @@ -# pkldb - -`pkldb` is a lightweight, high-performance, JSON-based key-value store designed to handle datasets of significant size while maintaining simplicity and speed. Built using the powerful `orjson` library, `pkldb` is perfect for developers who need an efficient, easy-to-use database for Python projects. This is a simplified version of pickleDB focusing on scalability with large datasets. - ---- - -## **Features** - -### 1. **High Performance** -- Blazing-fast operations for inserting, retrieving, and dumping data. -- Demonstrated ability to handle datasets up to **50 million key-value pairs** with predictable, linear performance scaling. - -### 2. **Ease of Use** -- Simple API with intuitive methods for common database operations: - - `set(key, value)` - Add or update key-value pairs. - - `get(key)` - Retrieve the value associated with a key. - - `remove(key)` - Delete a key-value pair. - - `purge()` - Clear the database. - - `all()` - Get a list of all keys. - - `dump()` - Persist data to disk. - -### 3. **Data Integrity** -- Atomic writes ensure the database remains consistent, even in the event of an error during disk operations. - -### 4. **Scalable Design** -- Efficient memory and disk utilization enable handling of massive datasets on modern hardware. - -### 5. **Configurable Auto Dumping** -- Enable or disable automatic saving of changes with the `auto_dump` parameter. - -### 6. **Lightweight and Portable** -- Stores data in a simple JSON file, making it easy to move and manage. - ---- - -## **Performance Highlights** - -The `pkldb` has been rigorously tested for datasets of various sizes, showcasing its impressive performance: - -### **Test Results** -| Entries | Memory Load Time | Retrieval Time | Dump Time | -|--------------|------------------|----------------|-----------| -| **1M** | 1.21s | 0.90s | 0.17s | -| **10M** | 14.11s | 10.30s | 1.67s | -| **20M** | 29.95s | 21.86s | 3.25s | -| **50M** | 93.79s | 136.42s | 61.08s | - -These results demonstrate `pkldb`'s capability to scale efficiently while maintaining excellent performance. - ---- - -## **Installation** -Download or clone this repository and include `pkldb.py` in your project. Then install `orjson`: - -```bash -pip install orjson -``` - -### Why `orjson`? -`orjson` is a fast and efficient JSON parser and serializer for Python. It is significantly faster than the built-in `json` module, enabling `pkldb` to achieve its high performance, especially when handling large datasets. - - ---- - -## **Usage** - -```python -from pkldb import pkldb - -# Initialize the database -mydb = pkldb("my_database.db", auto_dump=False) - -# Add key-value pairs -mydb.set("key1", "value1") -mydb.set("key2", 42) -mydb.set("key3", [1, 2, 3]) # Using a list as a value -mydb.set("key4", {"nested": "value"}) # Using a dictionary as a value - -# Retrieve a value -mydb.get("key1") # Output: value1 -mydb.get("key4") # Output: {'nested': 'value'} - -# List all keys -mydb.all() # Output: ["key1", "key2", "key3", "key4"] - -# Remove a key -mydb.remove("key1") -mydb.all() # Output: ["key2", "key3", "key4"] - -# Save the database to disk -mydb.dump() -print("Database saved to disk.") - -# Clear the database -mydb.purge() -mydb.all() # Output: [] -``` - ---- - -## **Comparison with Other Databases** - -| Feature | pkldb | Redis | SQLite | TinyDB | MongoDB | -|----------------------|-------------|--------------|--------------|-------------|-------------| -| **Storage Type** | JSON File | In-Memory | File-Based | JSON File | Document DB | -| **Data Model** | Key-Value | Key-Value | Relational | Key-Value | Document | -| **Persistence** | Yes | Optional | Yes | Yes | Yes | -| **Scalability** | Medium | High | Medium | Low | High | -| **Setup** | None | Server-Based | None | None | Server-Based| -| **Performance** | High | Very High | Medium | Low | High | -| **Dependencies** | Minimal | Moderate | Minimal | Minimal | High | -| **Concurrency** | Single-Threaded | Multi-Threaded | Single-Threaded | Single-Threaded | Multi-Threaded | -| **Use Case** | Lightweight and portable key-value store | High-performance caching | Local relational database | Lightweight JSON-based store | Scalable NoSQL solutions | - - -| Feature | pkldb | pickledb | KenobiDB | -|------------------------|---------------------------------|----------------------------------------------------------------------|------------------------------------| -| Database Type | Key-Value Store | Key-Value Store | Document-based Database | -| Persistence | Persistent | Persistent | Persistent | -| Thread Safety | No explicit thread safety | Thread-safe with RLock | Thread-safe with RLock | -| Data Storage Format | JSON using orjson | JSON using built-in json library | SQLite | -| Key Features | Atomic dump, auto_dump option | Optional TTL, auto_dump, compression | Async operations, document search | -| Supported Operations | set, get, dump, remove, purge, all | set, get, exists, remove, getall, clear, compress, append, lcreate, ladd, lgetall, lsort, lremove, lgetrange, llen, dcreate, dadd, dget, dgetall, dremove, dmerge, dkeys, dvalues | insert, remove, update, purge, search, find_any, find_all | -| TTL Support | No | Yes | No | -| Compression | No | Yes (gzip) | No | -| Dependencies | os, orjson | os, json, gzip, shutil, time | os, json, sqlite3, concurrent.futures | -| Performance Notes | Efficient up to ~20M entries. Scales linearly: ~30s load for 20M, ~1.7s dump for 10M. | Handles up to ~1M entries; performance drops sharply beyond this limit. | Handles up to ~10M entries; performance and SQLite overhead limit larger scales. | -| Scalability | Limited to local file system | Limited to local file system | Moderate scalability with SQLite | -| Backup/Restore | No built-in | No built-in | No built-in | -| License | BSD-3-Clause License | BSD-3-Clause License | BSD-3-Clause License | - - -### **Strengths** - -- **Speed**: Handles massive datasets with ease, outperforming many similar solutions. -- **Data Integrity**: Atomic dumps ensure your data is always safe. -- **Simplicity**: Minimal dependencies and an intuitive API make it beginner-friendly. -- **Portability**: JSON-based storage simplifies data sharing and management. - -### **Limitations** - -- **Memory Usage**: The entire dataset is loaded into memory, which might be a constraint on systems with limited RAM for extremely large datasets. -- **Single-Threaded**: The program is not thread-safe. For concurrent access, use external synchronization like: - ```python - import threading - - lock = threading.Lock() - - # Thread-safe operations - with lock: - mydb.set("thread-safe-key", "value") - ``` - ---- - -## **Contributing** - -Contributions are welcome! Feel free to open an issue or submit a pull request on GitHub. - ---- - -## **License** - -This project is licensed under the BSD License. See the `LICENSE` file for details. - ---- - -`pkldb` is your go-to choice for a simple, reliable, and fast key-value store. With proven performance at scale, it's the perfect tool for your next Python project. 🚀 - diff --git a/pkldb/pkldb.py b/pkldb/pkldb.py deleted file mode 100644 index 5adea95..0000000 --- a/pkldb/pkldb.py +++ /dev/null @@ -1,170 +0,0 @@ -""" -Copyright Harrison Erd - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, -this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, -this list of conditions and the following disclaimer in the documentation -and/or other materials provided with the distribution. - -3. Neither the name of the copyright holder nor the names of its -contributors may be used to endorse or promote products derived from this -software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS -IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, -THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR -CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR -OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, -EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -import os -import orjson - -class pkldb: - """ - A orjson-based key-value store with essential methods: set, get, dump, - delete, purge, and all. - """ - - def __init__(self, location, auto_dump=False): - """ - Initialize the pkldb object. - - Args: - location (str): Path to the JSON file. - auto_dump (bool): Automatically save changes to the file. - """ - self.location = os.path.expanduser(location) - self.auto_dump = auto_dump - self._load() - - def _load(self): - """ - Load data from the JSON file if it exists, or initialize an empty - database. - """ - if (os.path.exists(self.location) and - os.path.getsize(self.location) > 0): - try: - with open(self.location, 'rb') as f: - self.db = orjson.loads(f.read()) - print("Database loaded") - except Exception as e: - self.db = {} - print(f"{e}\nDatabase failed to load, empty database created") - else: - self.db = {} - print("Database created") - - def dump(self): - """ - Save the database to the file using an atomic dump. - - Behavior: - - Writes to a temporary file and replaces the - original file only after the write is successful, - ensuring data integrity. - - Returns: - bool: True if dump was successful, False if not. - """ - temp_location = f"{self.location}.tmp" - try: - with open(temp_location, 'wb') as temp_file: - temp_file.write(orjson.dumps(self.db)) - os.replace(temp_location, self.location) # Atomic replace - return True - except Exception as e: - print(f"Failed to write database to disk: {e}") - return False - - def set(self, key, value): - """ - Add or update a key-value pair in the database. - - Args: - key (any): The key to set. If the key is not a string, it will be - converted to a string. - value (any): The value to associate with the key. - - Returns: - bool: True if the operation succeeds. - - Behavior: - - If the key already exists, its value will be updated. - - If the key does not exist, it will be added to the database. - - Automatically dumps the database to disk if `auto_dump` - is enabled. - """ - key = str(key) if not isinstance(key, str) else key - self.db[key] = value - if self.auto_dump: - self.dump() - return True - - def remove(self, key): - """ - Remove a key and its value from the database. - - Args: - key (any): The key to delete. If the key is not a string, it will - be converted to a string. - - Returns: - bool: True if the key was deleted, False if the key does not exist. - """ - key = str(key) if not isinstance(key, str) else key - if key in self.db: - del self.db[key] - if self.auto_dump: - self.dump() - return True - return False - - def purge(self): - """ - Clear all keys from the database. - - Returns: - bool: True if the operation succeeds. - """ - self.db.clear() - if self.auto_dump: - self.dump() - return True - - def get(self, key): - """ - Get the value associated with a key. - - Args: - key (any): The key to retrieve. If the key is not a string, it will - be converted to a string. - - Returns: - any: The value associated with the key, or None if the key does - not exist. - """ - key = str(key) if not isinstance(key, str) else key - return self.db.get(key) - - def all(self): - """ - Get a list of all keys in the database. - - Returns: - list: A list of all keys. - """ - return list(self.db.keys()) - diff --git a/pkldb/pkldb_tests.py b/pkldb/pkldb_tests.py deleted file mode 100644 index 6069469..0000000 --- a/pkldb/pkldb_tests.py +++ /dev/null @@ -1,60 +0,0 @@ -import time -import unittest -import signal -from pkldb import pkldb # Adjust the import path if needed -import tempfile -import os - -class TestPkldbLargeDataset(unittest.TestCase): - def setUp(self): - """Set up a temporary pkldb instance for testing.""" - self.temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".db") - self.db = pkldb(self.temp_file.name, auto_dump=False) - - def tearDown(self): - """Clean up after tests.""" - if hasattr(self.db, "close"): - self.db.close() - if os.path.exists(self.temp_file.name): - os.remove(self.temp_file.name) - - def _timeout_handler(self, signum, frame): - raise TimeoutError("Test exceeded the timeout duration") - - def test_retrieve_before_dump(self): - """Stress test: Insert and retrieve a large number of key-value pairs, then dump.""" - timeout_duration = 600 # Timeout in seconds (10 minutes) - - # Set a signal-based timeout - signal.signal(signal.SIGALRM, self._timeout_handler) - signal.alarm(timeout_duration) - - try: - num_docs = 1_000_000 - - # Measure memory loading time - start_time = time.time() - for i in range(num_docs): - self.db.set(f"key{i}", f"value{i}") - mem_time = time.time() - mem_duration = mem_time - start_time - print(f"{num_docs} stored in memory in {mem_duration:.2f} seconds") - - # Measure retrieval performance before dumping - start_time = time.time() - retrieved_docs = [self.db.get(f"key{i}") for i in range(num_docs)] - retrieval_time = time.time() - start_time - print(f"Retrieved {num_docs} key-value pairs in {retrieval_time:.2f} seconds") - - # Measure dump performance - start_time = time.time() - self.db.dump() - dump_time = time.time() - start_time - print(f"Dumped {num_docs} key-value pairs to disk in {dump_time:.2f} seconds") - - finally: - signal.alarm(0) # Cancel the alarm after the test - -if __name__ == "__main__": - unittest.main() - diff --git a/setup.py b/setup.py index a129323..736dd50 100644 --- a/setup.py +++ b/setup.py @@ -1,10 +1,9 @@ - """ pickleDB -------- -pickleDB is lightweight, fast, and simple database based on Python's own -json module. And it's BSD licensed! +pickleDB is lightweight, fast, and simple database based on the orjson module. And it's BSD licensed! + pickleDB is Fun ``````````````` @@ -13,7 +12,7 @@ >>> import pickledb - >>> db = pickledb.load('test.db', False) + >>> db = pickledb.load('test.db') >>> db.set('key', 'value') @@ -31,33 +30,31 @@ $ pip install pickledb + Links ````` -* `website `_ -* `documentation `_ -* `pypi `_ -* `github repo `_ +* `Website `_ +* `Documentation `_ +* `PyPI `_ +* `Github Repo `_ -Latest Release Notes (version: 0.9) -``````````````````````````````````` -* Now load() uses *'rt'* mode instead of 'rb' (0.9.2) -* Change lrem(name) to *lremlist(name)* (0.9) -* Add *lremvalue(name, value)* (0.9) -* Add load() option to use sigterm handler or not (0.9) -* All *keys* must now be strings (0.8) -* All *names* for lists must now be strings (0.8) -* All *names* for dicts must now be strings (0.8) -* The get(key) function now returns *False* instead of None if there is no key (0.8) -* Switched to Python's built in json module from simplejson (0.8.1) +Key Improvements in Version 1.0 +``````````````````````````````` + +*pickleDB 1.0 is a reimagined version designed for speed, simplicity, and reliability. This version is NOT backwards compatible. Key changes include: +* Atomic Saves: Ensures data integrity during writes, eliminating potential corruption issues. +* Faster Serialization**: Switched to `orjson` for significantly improved speed. +* Streamlined API**: Removed legacy methods (e.g., `ladd`, `dmerge`) in favor of native Python operations. +* Unified Handling of Data Types**: Treats all Python-native types (lists, dicts, etc.) as first-class citizens. +* Explicit Saves**: The `auto_save` feature was removed to provide users greater control and optimize performance. -""" -from distutils.core import setup +""" setup(name="pickleDB", - version="0.9.3", + version="1.0", description="A lightweight and simple database using json.", long_description=__doc__, author="Harrison Erd", @@ -69,5 +66,7 @@ "License :: OSI Approved :: BSD License", "Intended Audience :: Developers", "Topic :: Database" ], - py_modules=['pickledb'],) + py_modules=['pickledb'], + install_requires=['orjson'], +) diff --git a/tests.py b/tests.py index ed210d6..c9785f5 100644 --- a/tests.py +++ b/tests.py @@ -1,115 +1,126 @@ - import unittest import os import time -from pickledb import load +import signal +from pickledb import PickleDB # Adjust the import path if needed -class TestPickleDBEnhanced(unittest.TestCase): +class TestPickleDB(unittest.TestCase): def setUp(self): - self.db_file = "test_db.json" - self.db = load(self.db_file, auto_dump=True, enable_ttl=True) + """Set up a PickleDB instance with a real file.""" + self.test_file = "test_pickledb.json" + self.db = PickleDB(self.test_file, auto_dump=False) def tearDown(self): - if os.path.exists(self.db_file): - os.remove(self.db_file) - if os.path.exists(f"{self.db_file}.gz"): - os.remove(f"{self.db_file}.gz") - - # Enhanced List Features - def test_lsort(self): - self.db.lcreate("test_list") - self.db.ladd("test_list", 3) - self.db.ladd("test_list", 1) - self.db.ladd("test_list", 2) - self.assertEqual(self.db.lsort("test_list"), [1, 2, 3]) - self.assertEqual(self.db.lsort("test_list", reverse=True), [3, 2, 1]) - - def test_lremove(self): - self.db.lcreate("test_list") - self.db.ladd("test_list", "item1") - self.db.ladd("test_list", "item2") - self.assertTrue(self.db.lremove("test_list", "item1")) - self.assertEqual(self.db.lgetall("test_list"), ["item2"]) - self.assertFalse(self.db.lremove("test_list", "nonexistent")) - - def test_lgetrange(self): - self.db.lcreate("test_list") - self.db.ladd("test_list", "a") - self.db.ladd("test_list", "b") - self.db.ladd("test_list", "c") - self.assertEqual(self.db.lgetrange("test_list", 0, 2), ["a", "b"]) - - def test_llen(self): - self.db.lcreate("test_list") - self.db.ladd("test_list", "a") - self.db.ladd("test_list", "b") - self.assertEqual(self.db.llen("test_list"), 2) - - # Enhanced Dictionary Features - def test_dremove(self): - self.db.dcreate("test_dict") - self.db.dadd("test_dict", "key1", "value1") - self.assertTrue(self.db.dremove("test_dict", "key1")) - self.assertFalse(self.db.dremove("test_dict", "key2")) - - def test_dmerge(self): - self.db.dcreate("test_dict") - self.db.dadd("test_dict", "key1", "value1") - self.db.dmerge("test_dict", {"key2": "value2", "key3": "value3"}) - self.assertEqual(self.db.dgetall("test_dict"), { - "key1": "value1", - "key2": "value2", - "key3": "value3" - }) - - def test_dkeys(self): - self.db.dcreate("test_dict") - self.db.dadd("test_dict", "key1", "value1") - self.db.dadd("test_dict", "key2", "value2") - self.assertEqual(set(self.db.dkeys("test_dict")), {"key1", "key2"}) - - def test_dvalues(self): - self.db.dcreate("test_dict") - self.db.dadd("test_dict", "key1", "value1") - self.db.dadd("test_dict", "key2", "value2") - self.assertEqual(set(self.db.dvalues("test_dict")), {"value1", "value2"}) - - # Additional Tests - def test_persistence(self): + """Clean up after tests.""" + if os.path.exists(self.test_file): + os.remove(self.test_file) + + def _timeout_handler(self, signum, frame): + """Handle timeouts for stress tests.""" + raise TimeoutError("Test exceeded the timeout duration") + + # Original Stress Test + def test_stress_operation(self): + """Stress test: Insert and retrieve a large number of key-value pairs, then dump.""" + timeout_duration = 600 # Timeout in seconds (10 minutes) + + # Set a signal-based timeout + signal.signal(signal.SIGALRM, self._timeout_handler) + signal.alarm(timeout_duration) + + try: + num_docs = 20_000_000 + + # Measure memory loading time + start_time = time.time() + for i in range(num_docs): + self.db.set(f"key{i}", f"value{i}") + mem_time = time.time() + mem_duration = mem_time - start_time + print(f"\n{num_docs} stored in memory in {mem_duration:.2f} seconds") + + # Measure retrieval performance before dumping + start_time = time.time() + retrieved_docs = [self.db.get(f"key{i}") for i in range(num_docs)] + retrieval_time = time.time() - start_time + print(f"Retrieved {num_docs} key-value pairs in {retrieval_time:.2f} seconds") + + # Measure dump performance + start_time = time.time() + self.db.dump() + dump_time = time.time() - start_time + print(f"Dumped {num_docs} key-value pairs to disk in {dump_time:.2f} seconds") + + finally: + signal.alarm(0) # Cancel the alarm after the test + + # Functional Tests + def test_set_and_get(self): + """Test setting and retrieving a key-value pair.""" self.db.set("key1", "value1") - del self.db - db = load(self.db_file, auto_dump=True) - self.assertEqual(db.get("key1"), "value1") - - def test_invalid_ladd(self): - with self.assertRaises(TypeError): - self.db.ladd("nonexistent_list", "item") + self.assertEqual(self.db.get("key1"), "value1") - def test_invalid_dadd(self): - with self.assertRaises(TypeError): - self.db.dadd("nonexistent_dict", "key", "value") + def test_get_nonexistent_key(self): + """Test retrieving a key that does not exist.""" + self.assertIsNone(self.db.get("nonexistent")) - def test_compress(self): + def test_remove_key(self): + """Test removing a key-value pair.""" self.db.set("key1", "value1") - self.assertTrue(self.db.compress()) - self.assertTrue(os.path.exists(f"{self.db_file}.gz")) - - def test_ttl_expiry(self): - self.db.set("key1", "value1", ttl=1) - time.sleep(2) + self.assertTrue(self.db.remove("key1")) self.assertIsNone(self.db.get("key1")) - def test_clear(self): + def test_remove_nonexistent_key(self): + """Test removing a key that does not exist.""" + self.assertFalse(self.db.remove("nonexistent")) + + def test_purge(self): + """Test purging all keys and values.""" self.db.set("key1", "value1") self.db.set("key2", "value2") - self.db.clear() - self.assertEqual(len(self.db.getall()), 0) + self.db.purge() + self.assertEqual(self.db.all(), []) - def test_deldb(self): + def test_all_keys(self): + """Test retrieving all keys.""" self.db.set("key1", "value1") - self.db.deldb() - self.assertFalse(os.path.exists(self.db_file)) + self.db.set("key2", "value2") + self.assertListEqual(sorted(self.db.all()), ["key1", "key2"]) + + def test_dump_and_reload(self): + """Test dumping the database to disk and reloading it.""" + self.db.set("key1", "value1") + self.db.dump() + reloaded_db = PickleDB(self.test_file, auto_dump=False) + self.assertEqual(reloaded_db.get("key1"), "value1") + + def test_invalid_file_loading(self): + """Test initializing a database with a corrupt file.""" + with open(self.test_file, 'w') as f: + f.write("corrupt data") + db = PickleDB(self.test_file, auto_dump=False) + self.assertEqual(db.all(), []) + + def test_auto_dump(self): + """Test the auto-dump functionality.""" + db = PickleDB(self.test_file, auto_dump=True) + db.set("key1", "value1") + reloaded_db = PickleDB(self.test_file, auto_dump=False) + self.assertEqual(reloaded_db.get("key1"), "value1") + + def test_set_non_string_key(self): + """Test setting a non-string key.""" + self.db.set(123, "value123") + self.assertEqual(self.db.get("123"), "value123") + + def test_remove_non_string_key(self): + """Test removing a key that was stored as a non-string key.""" + self.db.set(123, "value123") + self.assertTrue(self.db.remove(123)) + self.assertIsNone(self.db.get("123")) + if __name__ == "__main__": unittest.main() +