diff --git a/evaluation/README.md b/evaluation/README.md index ba8c7a0cc..8683c60b2 100644 --- a/evaluation/README.md +++ b/evaluation/README.md @@ -84,4 +84,4 @@ get `questions_32k.csv` and `shared_contexts_32k.jsonl` from https://huggingface # Specify the model and memory backend you want to use (e.g., mem0, zep, etc.) # If you want to use MIRIX, edit the the configuration in ./scripts/personamem/config.yaml ./scripts/run_pm_eval.sh -``` \ No newline at end of file +``` diff --git a/evaluation/scripts/PrefEval/pref_mem0.py b/evaluation/scripts/PrefEval/pref_mem0.py index 214068567..300e0ede3 100644 --- a/evaluation/scripts/PrefEval/pref_mem0.py +++ b/evaluation/scripts/PrefEval/pref_mem0.py @@ -56,7 +56,7 @@ def add_memory_for_line( for idx, _ in enumerate(conversation[::2]): msg_idx = idx * 2 - record_id = f"{lib}_user_pref_eval_{i}_{version}_{str(msg_idx)}" + record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}" timestamp_add = int(time.time() * 100) if record_id not in success_records: diff --git a/evaluation/scripts/PrefEval/pref_memobase.py b/evaluation/scripts/PrefEval/pref_memobase.py index e99b10520..776642657 100644 --- a/evaluation/scripts/PrefEval/pref_memobase.py +++ b/evaluation/scripts/PrefEval/pref_memobase.py @@ -12,6 +12,7 @@ from openai import OpenAI from tqdm import tqdm + ROOT_DIR = os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -68,7 +69,7 @@ def add_memory_for_line( ) for idx, _ in enumerate(conversation[::2]): msg_idx = idx * 2 - record_id = f"{lib}_user_pref_eval_{i}_{version}_{str(msg_idx)}" + record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}" if record_id not in success_records: mem_client.add(messages=conversation[msg_idx : msg_idx + 2], user_id=user_id) diff --git a/evaluation/scripts/PrefEval/pref_memos.py b/evaluation/scripts/PrefEval/pref_memos.py index 4a21e3af0..bbe1788b5 100644 --- a/evaluation/scripts/PrefEval/pref_memos.py +++ b/evaluation/scripts/PrefEval/pref_memos.py @@ -12,6 +12,7 @@ from openai import OpenAI from tqdm import tqdm + ROOT_DIR = os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -49,7 +50,7 @@ def add_memory_for_line( for idx, _ in enumerate(conversation[::2]): msg_idx = idx * 2 - record_id = f"{lib}_user_pref_eval_{i}_{version}_{str(msg_idx)}" + record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}" if record_id not in success_records: mem_client.add( diff --git a/evaluation/scripts/PrefEval/pref_memu.py b/evaluation/scripts/PrefEval/pref_memu.py index 4c37db7b7..00c411eb7 100644 --- a/evaluation/scripts/PrefEval/pref_memu.py +++ b/evaluation/scripts/PrefEval/pref_memu.py @@ -14,6 +14,7 @@ from openai import OpenAI from tqdm import tqdm + ROOT_DIR = os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -56,7 +57,7 @@ def add_memory_for_line( for idx, _ in enumerate(conversation[::2]): msg_idx = idx * 2 - record_id = f"{lib}_user_pref_eval_{i}_{version}_{str(msg_idx)}" + record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}" if record_id not in success_records: mem_client.add( diff --git a/evaluation/scripts/PrefEval/pref_supermemory.py b/evaluation/scripts/PrefEval/pref_supermemory.py index 68963e2af..7386bc462 100644 --- a/evaluation/scripts/PrefEval/pref_supermemory.py +++ b/evaluation/scripts/PrefEval/pref_supermemory.py @@ -12,6 +12,7 @@ from openai import OpenAI from tqdm import tqdm + ROOT_DIR = os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -54,7 +55,7 @@ def add_memory_for_line( for idx, _ in enumerate(conversation[::2]): msg_idx = idx * 2 - record_id = f"{lib}_user_pref_eval_{i}_{version}_{str(msg_idx)}" + record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}" if record_id not in success_records: mem_client.add( diff --git a/evaluation/scripts/PrefEval/pref_zep.py b/evaluation/scripts/PrefEval/pref_zep.py index be98c6ba9..8a4d50558 100644 --- a/evaluation/scripts/PrefEval/pref_zep.py +++ b/evaluation/scripts/PrefEval/pref_zep.py @@ -14,6 +14,7 @@ from openai import OpenAI from tqdm import tqdm + ROOT_DIR = os.path.dirname( os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) ) @@ -56,7 +57,7 @@ def add_memory_for_line( for idx, _ in enumerate(conversation[::2]): msg_idx = idx * 2 - record_id = f"{lib}_user_pref_eval_{i}_{version}_{str(msg_idx)}" + record_id = f"{lib}_user_pref_eval_{i}_{version}_{msg_idx!s}" if record_id not in success_records: mem_client.add( diff --git a/evaluation/scripts/personamem/pm_ingestion.py b/evaluation/scripts/personamem/pm_ingestion.py index fdbf43528..b960aa157 100644 --- a/evaluation/scripts/personamem/pm_ingestion.py +++ b/evaluation/scripts/personamem/pm_ingestion.py @@ -10,6 +10,7 @@ from tqdm import tqdm + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) @@ -171,7 +172,9 @@ def ingest_conv(row_data, context, version, conv_idx, frame, success_records, f) client = MemosApiOnlineClient() try: - ingest_session(session=context, user_id=user_id, session_id=conv_idx, frame=frame, client=client) + ingest_session( + session=context, user_id=user_id, session_id=conv_idx, frame=frame, client=client + ) print(f"โœ… Ingestion of conversation {conv_idx} completed") print("=" * 80) @@ -187,10 +190,9 @@ def main(frame, version, num_workers=2, clear=False): os.makedirs(f"results/pm/{frame}-{version}/", exist_ok=True) record_file = f"results/pm/{frame}-{version}/success_records.txt" - if clear: - if os.path.exists(record_file): - os.remove(record_file) - print("๐Ÿงน Cleared progress records") + if clear and os.path.exists(record_file): + os.remove(record_file) + print("๐Ÿงน Cleared progress records") print("\n" + "=" * 80) print(f"๐Ÿš€ PERSONAMEM INGESTION - {frame.upper()} v{version}".center(80)) @@ -205,15 +207,20 @@ def main(frame, version, num_workers=2, clear=False): success_records = set() if os.path.exists(record_file): - with open(record_file, "r") as f: - success_records = set(line.strip() for line in f) - print(f"๐Ÿ“Š Found {len(success_records)} completed conversations, {total_rows - len(success_records)} remaining") + with open(record_file) as f: + success_records = {line.strip() for line in f} + print( + f"๐Ÿ“Š Found {len(success_records)} completed conversations, {total_rows - len(success_records)} remaining" + ) start_time = datetime.now() all_data = list(load_rows_with_context(question_csv_path, context_jsonl_path)) - pending_data = [(idx, row_data, context) for idx, (row_data, context) in enumerate(all_data) - if str(idx) not in success_records] + pending_data = [ + (idx, row_data, context) + for idx, (row_data, context) in enumerate(all_data) + if str(idx) not in success_records + ] if not pending_data: print("โœ… All conversations have been processed!") @@ -232,16 +239,16 @@ def main(frame, version, num_workers=2, clear=False): conv_idx=idx, frame=frame, success_records=success_records, - f=f + f=f, ) futures.append(future) completed_count = 0 for future in tqdm( - as_completed(futures), total=len(futures), desc="Processing conversations" + as_completed(futures), total=len(futures), desc="Processing conversations" ): try: - result = future.result() + future.result() completed_count += 1 except Exception as exc: print(f"\nโŒ Conversation generated an exception: {exc}") @@ -261,13 +268,28 @@ def main(frame, version, num_workers=2, clear=False): if __name__ == "__main__": parser = argparse.ArgumentParser(description="PersonaMem Ingestion Script") - parser.add_argument("--lib", type=str, - choices=["memos-api-online", "mem0", "mem0_graph", "memos-api", "memobase", "memu", - "supermemory", "zep"], - default='memos-api') - parser.add_argument("--version", type=str, default="default", help="Version of the evaluation framework.") - parser.add_argument("--workers", type=int, default=3, help="Number of parallel workers for processing users.") + parser.add_argument( + "--lib", + type=str, + choices=[ + "memos-api-online", + "mem0", + "mem0_graph", + "memos-api", + "memobase", + "memu", + "supermemory", + "zep", + ], + default="memos-api", + ) + parser.add_argument( + "--version", type=str, default="default", help="Version of the evaluation framework." + ) + parser.add_argument( + "--workers", type=int, default=3, help="Number of parallel workers for processing users." + ) parser.add_argument("--clear", action="store_true", help="Clear progress and start fresh") args = parser.parse_args() - main(frame=args.lib, version=args.version, num_workers=args.workers, clear=args.clear) \ No newline at end of file + main(frame=args.lib, version=args.version, num_workers=args.workers, clear=args.clear) diff --git a/evaluation/scripts/personamem/pm_metric.py b/evaluation/scripts/personamem/pm_metric.py index b9d10a576..4c93ec0c6 100644 --- a/evaluation/scripts/personamem/pm_metric.py +++ b/evaluation/scripts/personamem/pm_metric.py @@ -353,12 +353,23 @@ def print_summary(results): parser.add_argument( "--lib", type=str, - choices=["zep", "mem0", "mem0_graph", "memos-api", "memos-api-online", "memobase", "memu", "supermemory"], + choices=[ + "zep", + "mem0", + "mem0_graph", + "memos-api", + "memos-api-online", + "memobase", + "memu", + "supermemory", + ], required=True, help="Memory library to evaluate", default="memos-api", ) - parser.add_argument("--version", type=str, default="default", help="Evaluation framework version") + parser.add_argument( + "--version", type=str, default="default", help="Evaluation framework version" + ) args = parser.parse_args() lib, version = args.lib, args.version diff --git a/evaluation/scripts/personamem/pm_responses.py b/evaluation/scripts/personamem/pm_responses.py index 2e41b4140..171b5af1a 100644 --- a/evaluation/scripts/personamem/pm_responses.py +++ b/evaluation/scripts/personamem/pm_responses.py @@ -10,6 +10,7 @@ from openai import OpenAI from tqdm import tqdm + sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import re @@ -153,9 +154,9 @@ def main(frame, version, num_runs=3, num_workers=4): future_to_user_id[future] = user_id for future in tqdm( - as_completed(future_to_user_id), - total=len(future_to_user_id), - desc="๐Ÿ“ Generating responses", + as_completed(future_to_user_id), + total=len(future_to_user_id), + desc="๐Ÿ“ Generating responses", ): user_id = future_to_user_id[future] try: @@ -184,12 +185,30 @@ def main(frame, version, num_runs=3, num_workers=4): if __name__ == "__main__": parser = argparse.ArgumentParser(description="PersonaMem Response Generation Script") - parser.add_argument("--lib", type=str, - choices=["memos-api-online", "zep", "mem0", "mem0_graph", "memos-api", "memobase", "memu", - "supermemory"], default='memos-api') - parser.add_argument("--version", type=str, default="default", help="Version of the evaluation framework.") - parser.add_argument("--num_runs", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation.") - parser.add_argument("--workers", type=int, default=10, help="Number of worker threads to use for processing.") + parser.add_argument( + "--lib", + type=str, + choices=[ + "memos-api-online", + "zep", + "mem0", + "mem0_graph", + "memos-api", + "memobase", + "memu", + "supermemory", + ], + default="memos-api", + ) + parser.add_argument( + "--version", type=str, default="default", help="Version of the evaluation framework." + ) + parser.add_argument( + "--num_runs", type=int, default=3, help="Number of runs for LLM-as-a-Judge evaluation." + ) + parser.add_argument( + "--workers", type=int, default=10, help="Number of worker threads to use for processing." + ) args = parser.parse_args() main(frame=args.lib, version=args.version, num_runs=args.num_runs, num_workers=args.workers) diff --git a/evaluation/scripts/personamem/pm_search.py b/evaluation/scripts/personamem/pm_search.py index 13ed659d2..80a65e09b 100644 --- a/evaluation/scripts/personamem/pm_search.py +++ b/evaluation/scripts/personamem/pm_search.py @@ -3,10 +3,12 @@ import json import os import sys + from collections import defaultdict from concurrent.futures import ThreadPoolExecutor, as_completed from datetime import datetime from time import time + from tqdm import tqdm @@ -232,6 +234,7 @@ def process_user(row_data, conv_idx, frame, version, top_k=20): context, duration_ms = memobase_search(client, question, user_id, top_k) elif frame == "memos-api-online": from utils.client import MemosApiOnlineClient + client = MemosApiOnlineClient() print("๐Ÿ”Œ Using memos-api-online client for search...") context, duration_ms = memos_search(client, question, user_id, top_k) @@ -253,7 +256,7 @@ def process_user(row_data, conv_idx, frame, version, top_k=20): os.makedirs(f"results/pm/{frame}-{version}/tmp", exist_ok=True) with open( - f"results/pm/{frame}-{version}/tmp/{frame}_pm_search_results_{conv_idx}.json", "w" + f"results/pm/{frame}-{version}/tmp/{frame}_pm_search_results_{conv_idx}.json", "w" ) as f: json.dump(search_results, f, indent=4) print(f"๐Ÿ’พ Search results for conversation {conv_idx} saved...") @@ -304,7 +307,7 @@ def main(frame, version, top_k=20, num_workers=2): } for future in tqdm( - as_completed(future_to_idx), total=len(future_to_idx), desc="Processing conversations" + as_completed(future_to_idx), total=len(future_to_idx), desc="Processing conversations" ): idx = future_to_idx[future] try: @@ -333,13 +336,29 @@ def main(frame, version, top_k=20, num_workers=2): if __name__ == "__main__": parser = argparse.ArgumentParser(description="PersonaMem Search Script") - parser.add_argument("--lib", type=str, - choices=["memos-api-online", "mem0", "mem0_graph", "memos-api", "memobase", "memu", - "supermemory"], - default='memos-api') - parser.add_argument("--version", type=str, default="default", help="Version of the evaluation framework.") - parser.add_argument("--top_k", type=int, default=20, help="Number of top results to retrieve from the search.") - parser.add_argument("--workers", type=int, default=3, help="Number of parallel workers for processing users.") + parser.add_argument( + "--lib", + type=str, + choices=[ + "memos-api-online", + "mem0", + "mem0_graph", + "memos-api", + "memobase", + "memu", + "supermemory", + ], + default="memos-api", + ) + parser.add_argument( + "--version", type=str, default="default", help="Version of the evaluation framework." + ) + parser.add_argument( + "--top_k", type=int, default=20, help="Number of top results to retrieve from the search." + ) + parser.add_argument( + "--workers", type=int, default=3, help="Number of parallel workers for processing users." + ) args = parser.parse_args() diff --git a/evaluation/scripts/run_prefeval_eval.sh b/evaluation/scripts/run_prefeval_eval.sh index 129382ebf..6f5f3b7b0 100755 --- a/evaluation/scripts/run_prefeval_eval.sh +++ b/evaluation/scripts/run_prefeval_eval.sh @@ -143,4 +143,4 @@ fi echo "" echo "--- PrefEval Pipeline completed successfully! ---" -echo "Final results are in $RESPONSE_FILE" \ No newline at end of file +echo "Final results are in $RESPONSE_FILE" diff --git a/src/memos/memories/textual/prefer_text_memory/adder.py b/src/memos/memories/textual/prefer_text_memory/adder.py index 052ae30c2..8d00ae81d 100644 --- a/src/memos/memories/textual/prefer_text_memory/adder.py +++ b/src/memos/memories/textual/prefer_text_memory/adder.py @@ -232,8 +232,12 @@ def _update_memory_fine( need_update = ( need_update if isinstance(need_update, bool) else need_update.lower() == "true" ) - update_item = [mem for mem in retrieved_memories if mem.id == rsp["id"]] - if need_update and update_item: + update_item = ( + [mem for mem in retrieved_memories if mem.id == rsp["id"]] + if rsp and "id" in rsp + else [] + ) + if need_update and update_item and rsp: update_vec_db_item = update_item[0] update_vec_db_item.payload[preference_type] = rsp["new_preference"] update_vec_db_item.payload["updated_at"] = vec_db_item.payload["updated_at"]