diff --git a/src/powermem/__init__.py b/src/powermem/__init__.py index 2347eee..5f19b78 100644 --- a/src/powermem/__init__.py +++ b/src/powermem/__init__.py @@ -63,8 +63,142 @@ def from_config(config: Any = None, **kwargs): Create Memory instance from configuration Args: - config: Configuration dictionary - **kwargs: Additional parameters + config: Optional configuration dictionary. If None, auto-loads from .env file. + The configuration dictionary can contain the following fields: + + **Required/Common Fields:** + + - **llm** (Dict[str, Any]): LLM provider configuration + - provider (str): LLM provider name (e.g., 'openai', 'qwen', 'ollama', 'anthropic', 'gemini', 'deepseek', 'vllm', 'langchain') + - config (Dict[str, Any]): Provider-specific configuration + - api_key (str, optional): API key for the LLM provider + - model (str, optional): Model name (e.g., 'gpt-4o-mini', 'qwen-plus') + - temperature (float, optional): Sampling temperature (default: 0.7) + - max_tokens (int, optional): Maximum tokens to generate (default: 1000) + - top_p (float, optional): Top-p sampling parameter + - top_k (int, optional): Top-k sampling parameter + - openai_base_url (str, optional): Custom base URL for OpenAI-compatible APIs + - dashscope_base_url (str, optional): Custom base URL for Qwen/DashScope + - enable_search (bool, optional): Enable web search for Qwen + + - **embedder** (Dict[str, Any]): Embedding model configuration + - provider (str): Embedding provider name (e.g., 'openai', 'qwen', 'ollama', 'huggingface', 'azure_openai', 'gemini', 'vertexai', 'together', 'lmstudio', 'langchain', 'aws_bedrock') + - config (Dict[str, Any]): Provider-specific configuration + - api_key (str, optional): API key for the embedding provider + - model (str, optional): Embedding model name (e.g., 'text-embedding-v4', 'text-embedding-ada-002') + - embedding_dims (int, optional): Embedding dimensions (default: 1536) + - ollama_base_url (str, optional): Base URL for Ollama + - openai_base_url (str, optional): Base URL for OpenAI-compatible APIs + - huggingface_base_url (str, optional): Base URL for HuggingFace + - model_kwargs (Dict, optional): Additional model arguments for HuggingFace + - azure_kwargs (Dict, optional): Azure-specific configuration + - vertex_credentials_json (str, optional): Path to Vertex AI credentials JSON + - lmstudio_base_url (str, optional): Base URL for LM Studio + - aws_access_key_id (str, optional): AWS access key for Bedrock + - aws_secret_access_key (str, optional): AWS secret key for Bedrock + - aws_region (str, optional): AWS region for Bedrock + + - **vector_store** (Dict[str, Any]): Vector store configuration + - provider (str): Vector store provider (e.g., 'oceanbase', 'pgvector', 'sqlite', 'postgres') + - config (Dict[str, Any]): Provider-specific configuration + For OceanBase: + - collection_name (str): Collection/table name + - connection_args (Dict): Database connection arguments + - host (str): Database host + - port (str/int): Database port + - user (str): Database user + - password (str): Database password + - db_name (str): Database name + - vidx_metric_type (str): Vector index metric type (e.g., 'cosine', 'l2') + - index_type (str): Index type (e.g., 'IVF_FLAT') + - embedding_model_dims (int): Embedding dimensions + - primary_field (str): Primary key field name + - vector_field (str): Vector field name + - text_field (str): Text field name + - metadata_field (str): Metadata field name + - vidx_name (str): Vector index name + For PostgreSQL (pgvector): + - collection_name (str): Collection/table name + - dbname (str): Database name + - host (str): Database host + - port (int): Database port + - user (str): Database user + - password (str): Database password + - embedding_model_dims (int): Embedding dimensions + - diskann (bool): Enable DiskANN index + - hnsw (bool): Enable HNSW index + For SQLite: + - database_path (str): Path to SQLite database file + - collection_name (str): Collection/table name + - enable_wal (bool): Enable Write-Ahead Logging + - timeout (int): Connection timeout in seconds + + **Optional Fields:** + + - **graph_store** (Dict[str, Any], optional): Graph store configuration + - enabled (bool): Whether to enable graph store (default: False) + - provider (str): Graph store provider (e.g., 'oceanbase') + - config (Dict[str, Any]): Provider-specific configuration + - llm (Dict[str, Any], optional): LLM configuration for graph queries + - custom_prompt (str, optional): Custom prompt for entity extraction + - custom_extract_relations_prompt (str, optional): Custom prompt for relation extraction + - custom_update_graph_prompt (str, optional): Custom prompt for graph updates + - custom_delete_relations_prompt (str, optional): Custom prompt for deleting relations + + - **reranker** (Dict[str, Any], optional): Reranker configuration + - enabled (bool): Whether to enable reranker (default: False) + - provider (str): Reranker provider (e.g., 'qwen', 'cohere') + - config (Dict[str, Any]): Provider-specific configuration + - model (str, optional): Reranker model name + - api_key (str, optional): API key for reranker + + - **intelligent_memory** (Dict[str, Any], optional): Intelligent memory management configuration (Ebbinghaus algorithm) + - enabled (bool): Whether to enable intelligent memory (default: True) + - initial_retention (float): Initial retention strength for new memories (default: 1.0) + - decay_rate (float): Rate at which memories decay over time (default: 0.1) + - reinforcement_factor (float): Factor by which memories are reinforced when accessed (default: 0.3) + - working_threshold (float): Threshold for working memory classification (default: 0.3) + - short_term_threshold (float): Threshold for short-term memory classification (default: 0.6) + - long_term_threshold (float): Threshold for long-term memory classification (default: 0.8) + + - **agent_memory** (Dict[str, Any], optional): Agent memory management configuration + - enabled (bool): Whether to enable agent memory (default: True) + - mode (str): Agent memory mode: 'multi_agent', 'multi_user', 'hybrid', or 'auto' (default: 'auto') + - default_scope (str): Default scope for memories: 'private', 'agent_group', 'user_group', 'public', 'restricted' (default: 'private') + - default_privacy_level (str): Default privacy level: 'standard', 'sensitive', 'confidential' (default: 'standard') + - default_collaboration_level (str): Default collaboration level: 'isolated', 'collaborative' (default: 'isolated') + - default_access_permission (str): Default access permission: 'read', 'write', 'delete', 'admin' (default: 'read') + - enable_collaboration (bool): Whether to enable collaboration features (default: True) + + - **telemetry** (Dict[str, Any], optional): Telemetry and monitoring configuration + - enable_telemetry (bool): Whether to enable telemetry (default: False) + - telemetry_endpoint (str): Endpoint URL for telemetry data (default: 'https://telemetry.powermem.ai') + - telemetry_api_key (str, optional): API key for telemetry service + - batch_size (int): Number of events to batch before sending (default: 100) + - flush_interval (int): Interval in seconds to flush telemetry data (default: 30) + + - **audit** (Dict[str, Any], optional): Audit logging configuration + - enabled (bool): Whether to enable audit logging (default: True) + - log_file (str): Path to the audit log file (default: './logs/audit.log') + - log_level (str): Logging level for audit logs (default: 'INFO') + - retention_days (int): Number of days to retain audit logs (default: 90) + + - **logging** (Dict[str, Any], optional): Application logging configuration + - level (str): Logging level (default: 'DEBUG') + - format (str): Log message format (default: '%(asctime)s - %(name)s - %(levelname)s - %(message)s') + - file (str): Path to the log file (default: './logs/powermem.log') + + - **version** (str, optional): API version (default: 'v1.1') + + - **custom_fact_extraction_prompt** (str, optional): Custom prompt for fact extraction + + - **custom_update_memory_prompt** (str, optional): Custom prompt for memory updates + + - **custom_importance_evaluation_prompt** (str, optional): Custom prompt for importance evaluation + + - **audio_llm** (Dict[str, Any], optional): Audio language model configuration (same structure as 'llm') + + **kwargs: Additional parameters to pass to Memory constructor Returns: Memory instance @@ -73,13 +207,45 @@ def from_config(config: Any = None, **kwargs): ```python from powermem import from_config + # Full configuration example memory = from_config({ - "llm": {"provider": "openai", "config": {"api_key": "..."}}, - "embedder": {"provider": "openai", "config": {"api_key": "..."}}, - "vector_store": {"provider": "chroma", "config": {...}}, + "llm": { + "provider": "openai", + "config": { + "api_key": "...", + "model": "gpt-4o-mini", + "temperature": 0.7 + } + }, + "embedder": { + "provider": "openai", + "config": { + "api_key": "...", + "model": "text-embedding-ada-002", + "embedding_dims": 1536 + } + }, + "vector_store": { + "provider": "oceanbase", + "config": { + "collection_name": "memories", + "connection_args": { + "host": "127.0.0.1", + "port": "2881", + "user": "root@sys", + "password": "password", + "db_name": "powermem" + }, + "embedding_model_dims": 1536 + } + }, + "intelligent_memory": { + "enabled": True, + "decay_rate": 0.1 + } }) - # Or auto-load from .env + # Or auto-load from .env file memory = from_config() ``` """ diff --git a/src/powermem/core/async_memory.py b/src/powermem/core/async_memory.py index 866460b..6870222 100644 --- a/src/powermem/core/async_memory.py +++ b/src/powermem/core/async_memory.py @@ -415,7 +415,24 @@ async def add( prompt: Optional[str] = None, infer: bool = True, ) -> Dict[str, Any]: - """Add a new memory asynchronously with optional intelligent processing.""" + """Add a new memory asynchronously with optional intelligent processing. + + Returns: + Dict[str, Any]: A dictionary containing the add operation results with the following structure: + - "results" (List[Dict]): List of memory operation results, where each result contains: + - "id" (int): Memory ID + - "memory" (str): The memory content + - "event" (str): Operation event type (e.g., "ADD", "UPDATE", "DELETE") + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict, optional): Metadata dictionary + - "created_at" (str, optional): Creation timestamp in ISO format + - "previous_memory" (str, optional): Previous memory content (for UPDATE events) + - "relations" (Dict, optional): Graph relations if graph store is enabled, containing: + - "deleted_entities" (List): List of deleted graph entities + - "added_entities" (List): List of added graph entities + """ try: # Handle messages parameter if messages is None: @@ -942,7 +959,22 @@ async def search( limit: int = 30, threshold: Optional[float] = None, ) -> Dict[str, Any]: - """Search for memories asynchronously.""" + """Search for memories asynchronously. + + Returns: + Dict[str, Any]: A dictionary containing search results with the following structure: + - "results" (List[Dict]): List of memory search results, where each result contains: + - "memory" (str): The memory content + - "metadata" (Dict): Metadata associated with the memory + - "score" (float): Similarity score for the result + - "id" (int, optional): Memory ID + - "created_at" (datetime, optional): Creation timestamp + - "updated_at" (datetime, optional): Update timestamp + - "user_id" (str, optional): User ID + - "agent_id" (str, optional): Agent ID + - "run_id" (str, optional): Run ID + - "relations" (List, optional): Graph relations if graph store is enabled + """ try: # Select embedding service based on filters (for sub-store routing) embedding_service = self._get_embedding_service(filters) @@ -1039,7 +1071,21 @@ async def get( user_id: Optional[str] = None, agent_id: Optional[str] = None, ) -> Optional[Dict[str, Any]]: - """Get a specific memory by ID asynchronously.""" + """Get a specific memory by ID asynchronously. + + Returns: + Optional[Dict[str, Any]]: A dictionary containing the memory data if found, None otherwise. + The dictionary contains the following fields: + - "id" (int): Memory ID + - "content" (str): The memory content + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict): Metadata dictionary associated with the memory + - "created_at" (datetime, optional): Creation timestamp + - "updated_at" (datetime, optional): Update timestamp + Returns None if the memory is not found or access is denied. + """ try: result = await self.storage.get_memory_async(memory_id, user_id, agent_id) @@ -1074,7 +1120,23 @@ async def update( agent_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: - """Update an existing memory asynchronously.""" + """Update an existing memory asynchronously. + + Returns: + Dict[str, Any]: A dictionary containing the updated memory data if successful, None if memory not found or access denied. + The dictionary contains the following fields: + - "id" (int): Memory ID + - "content" (str): The updated memory content (stored as "data" in payload) + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict): Metadata dictionary associated with the memory + - "created_at" (str, optional): Creation timestamp in ISO format + - "updated_at" (str): Update timestamp in ISO format + - "hash" (str): Content hash for deduplication + - "category" (str, optional): Category of the memory + Returns None if the memory is not found or access is denied. + """ try: # Validate content is not empty if not content or not content.strip(): @@ -1177,7 +1239,21 @@ async def get_all( offset: int = 0, filters: Optional[Dict[str, Any]] = None, ) -> Dict[str, List[Dict[str, Any]]]: - """Get all memories with optional filtering asynchronously.""" + """Get all memories with optional filtering asynchronously. + + Returns: + Dict[str, List[Dict[str, Any]]]: A dictionary containing all memories with the following structure: + - "results" (List[Dict]): List of memory dictionaries, where each memory contains: + - "id" (int): Memory ID + - "content" (str): The memory content + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict): Metadata dictionary associated with the memory + - "created_at" (datetime or str, optional): Creation timestamp + - "updated_at" (datetime or str, optional): Update timestamp + - "relations" (List[Dict], optional): Graph relations if graph store is enabled + """ try: results = await self.storage.get_all_memories_async(user_id, agent_id, run_id, limit, offset) @@ -1449,7 +1525,9 @@ async def migrate_all_sub_stores(self, delete_source: bool = True) -> Dict[str, delete_source: Whether to delete source data Returns: - Migration record count for each sub store {store_name: count} + Dict[str, int]: A dictionary mapping sub store names to the number of migrated records. + Each key is a sub store name (str), and each value is the count of migrated records (int). + If migration fails for a sub store, its count will be 0. """ results = {} for index, sub_config in enumerate(self.sub_stores_config): diff --git a/src/powermem/core/memory.py b/src/powermem/core/memory.py index 69f2aab..e6647cf 100644 --- a/src/powermem/core/memory.py +++ b/src/powermem/core/memory.py @@ -483,7 +483,24 @@ def add( prompt: Optional[str] = None, infer: bool = True, ) -> Dict[str, Any]: - """Add a new memory with optional intelligent processing.""" + """Add a new memory with optional intelligent processing. + + Returns: + Dict[str, Any]: A dictionary containing the add operation results with the following structure: + - "results" (List[Dict]): List of memory operation results, where each result contains: + - "id" (int): Memory ID + - "memory" (str): The memory content + - "event" (str): Operation event type (e.g., "ADD", "UPDATE", "DELETE") + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict, optional): Metadata dictionary + - "created_at" (str, optional): Creation timestamp in ISO format + - "previous_memory" (str, optional): Previous memory content (for UPDATE events) + - "relations" (Dict, optional): Graph relations if graph store is enabled, containing: + - "deleted_entities" (List): List of deleted graph entities + - "added_entities" (List): List of added graph entities + """ try: # Handle messages parameter if messages is None: @@ -538,7 +555,23 @@ def _simple_add( memory_type: Optional[str] = None, prompt: Optional[str] = None, ) -> Dict[str, Any]: - """Simple add mode: direct storage without intelligence.""" + """Simple add mode: direct storage without intelligence. + + Returns: + Dict[str, Any]: A dictionary containing the add operation results with the following structure: + - "results" (List[Dict]): List containing a single memory operation result with: + - "id" (int): Memory ID + - "memory" (str): The memory content + - "event" (str): Operation event type ("ADD") + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict, optional): Metadata dictionary + - "created_at" (str): Creation timestamp in ISO format + - "relations" (Dict, optional): Graph relations if graph store is enabled, containing: + - "deleted_entities" (List): List of deleted graph entities + - "added_entities" (List): List of added graph entities + """ # Parse messages into content if isinstance(messages, str): content = messages @@ -1004,7 +1037,22 @@ def search( limit: int = 30, threshold: Optional[float] = None, ) -> Dict[str, Any]: - """Search for memories.""" + """Search for memories. + + Returns: + Dict[str, Any]: A dictionary containing search results with the following structure: + - "results" (List[Dict]): List of memory search results, where each result contains: + - "memory" (str): The memory content + - "metadata" (Dict): Metadata associated with the memory + - "score" (float): Similarity score for the result + - "id" (int, optional): Memory ID + - "created_at" (datetime, optional): Creation timestamp + - "updated_at" (datetime, optional): Update timestamp + - "user_id" (str, optional): User ID + - "agent_id" (str, optional): Agent ID + - "run_id" (str, optional): Run ID + - "relations" (List, optional): Graph relations if graph store is enabled + """ try: # Select embedding service based on filters (for sub-store routing) embedding_service = self._get_embedding_service(filters) @@ -1101,7 +1149,21 @@ def get( user_id: Optional[str] = None, agent_id: Optional[str] = None, ) -> Optional[Dict[str, Any]]: - """Get a specific memory by ID.""" + """Get a specific memory by ID. + + Returns: + Optional[Dict[str, Any]]: A dictionary containing the memory data if found, None otherwise. + The dictionary contains the following fields: + - "id" (int): Memory ID + - "content" (str): The memory content + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict): Metadata dictionary associated with the memory + - "created_at" (datetime, optional): Creation timestamp + - "updated_at" (datetime, optional): Update timestamp + Returns None if the memory is not found or access is denied. + """ try: result = self.storage.get_memory(memory_id, user_id, agent_id) @@ -1138,7 +1200,23 @@ def update( agent_id: Optional[str] = None, metadata: Optional[Dict[str, Any]] = None, ) -> Dict[str, Any]: - """Update an existing memory.""" + """Update an existing memory. + + Returns: + Dict[str, Any]: A dictionary containing the updated memory data if successful, None if memory not found or access denied. + The dictionary contains the following fields: + - "id" (int): Memory ID + - "content" (str): The updated memory content (stored as "data" in payload) + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict): Metadata dictionary associated with the memory + - "created_at" (str, optional): Creation timestamp in ISO format + - "updated_at" (str): Update timestamp in ISO format + - "hash" (str): Content hash for deduplication + - "category" (str, optional): Category of the memory + Returns None if the memory is not found or access is denied. + """ try: # Validate content is not empty if not content or not content.strip(): @@ -1275,7 +1353,21 @@ def get_all( offset: int = 0, filters: Optional[Dict[str, Any]] = None, ) -> dict[str, list[dict[str, Any]]]: - """Get all memories with optional filtering.""" + """Get all memories with optional filtering. + + Returns: + dict[str, list[dict[str, Any]]]: A dictionary containing all memories with the following structure: + - "results" (List[Dict]): List of memory dictionaries, where each memory contains: + - "id" (int): Memory ID + - "content" (str): The memory content + - "user_id" (str, optional): User ID associated with the memory + - "agent_id" (str, optional): Agent ID associated with the memory + - "run_id" (str, optional): Run ID associated with the memory + - "metadata" (Dict): Metadata dictionary associated with the memory + - "created_at" (datetime or str, optional): Creation timestamp + - "updated_at" (datetime or str, optional): Update timestamp + - "relations" (List[Dict], optional): Graph relations if graph store is enabled + """ try: results = self.storage.get_all_memories(user_id, agent_id, run_id, limit, offset) @@ -1523,7 +1615,9 @@ def migrate_all_sub_stores(self, delete_source: bool = True) -> Dict[str, int]: delete_source: Whether to delete source data Returns: - Migration record count for each sub store {store_name: count} + Dict[str, int]: A dictionary mapping sub store names to the number of migrated records. + Each key is a sub store name (str), and each value is the count of migrated records (int). + If migration fails for a sub store, its count will be 0. """ results = {} for index, sub_config in enumerate(self.sub_stores_config):