Safety Buffer (#16)

adamyodinsky · Mar 31, 2023 · 63c0edc · 63c0edc
1 parent ed05042
commit 63c0edc
Show file tree

Hide file tree

Showing 9 changed files with 93 additions and 114 deletions.
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -33,10 +33,8 @@ jobs:
       - name: lint
         run: poetry run pylint terminalgpt tests
       - name: test
-        run: |
-          filename=$(date +"%Y-%m-%d")"_test_result.xml"
-          poetry run pytest -v --junitxml=${filename}
-      # TODOLATER: publish test results
+        run: poetry run pytest
+      # TODO: publish test results
       # - name: Publish Test Results
       #   uses: EnricoMi/publish-unit-test-result-action@v2
       #   with:

diff --git a/.gitignore b/.gitignore
@@ -4,3 +4,4 @@ dist
 .vscode
 .idea
 dummy.py
+ARCH.md
diff --git a/README.md b/README.md
@@ -17,19 +17,21 @@ Whether you need help with a quick question or want to explore a complex topic,
 
 Some advantages of using TerminalGPT over the chatGPT browser-based app:
 
-1. It doesn't disconnect like the browser-based app, so you can leave it running in a terminal session on the side without losing context.
-2. It's highly available and can be used whenever you need it.
-3. It's faster with replies than the browser-based app.
-4. You can use TerminalGPT with your IDE terminal, which means you won't have to constantly switch between your browser and your IDE when you have questions.
-5. TerminalGPT's answers are tailored to your machine's operating system, distribution, and chip-set architecture.
-6. Doesn't use your conversation data for training the model (unlike the browser-based app).
-7. Your conversations are stored locally on your machine, so only you can access them.
+- It doesn't disconnect like the browser-based app, so you can leave it running in a terminal session on the side without losing context.
+- It's highly available and can be used whenever you need it.
+- It's faster with replies than the browser-based app.
+- You can use TerminalGPT with your IDE terminal, which means you won't have to constantly switch between your browser and your IDE when you have questions.
+- TerminalGPT's answers are tailored to your machine's operating system, distribution, and chip-set architecture
+- Doesn't use your conversation data for training the model (unlike the browser-based app).
+- Your conversations are stored locally on your machine, so only you can access them.
 
 ## Pre-requisites
 
-1. Python 3.6 or higher
-2. An OpenAI Account and API key (It's free for personal use).
-[How to create OpenAI API keys](https://elephas.app/blog/how-to-create-openai-api-keys-cl5c4f21d281431po7k8fgyol0)
+- Python 3.6 or higher
+- An OpenAI Account and API key (It's free for personal use).
+   1. Sign up at <https://beta.openai.com/signup> using email or Google/Microsoft account.
+   2. Go to <https://beta.openai.com/account/api-keys> or click on "View API keys" in the menu to get your API key.
+   For a more detailed guide on how to create an OpenAI API key, click [here](https://elephas.app/blog/how-to-create-openai-api-keys-cl5c4f21d281431po7k8fgyol0).
 
 ## Installation
 
@@ -57,8 +59,6 @@ That's it! You're ready to use TerminalGPT!
 Usage: terminalgpt [OPTIONS] COMMAND [ARGS]...
 
 Options:
-  --debug                Prints amounts of tokens used.
-  --token-limit INTEGER  Set the token limit between 1024 and 4096.
   --help                 Show this message and exit.
 
 Commands:
@@ -92,16 +92,6 @@ Delete previous conversations:
 terminalgpt delete
 ```
 
-### Using flags
-
-Using flags, you can set the token limit and debug mode. the flags should be used before the command.
-
-For example:
-
-```sh
-terminalgpt --token-limit 2048 --debug new
-```
-
 ---
 
 ## Future Plans

diff --git a/makefile b/makefile
@@ -19,10 +19,10 @@ publish: test
 	poetry publish
 
 run-new:
-	poetry run terminalgpt --debug new
+	LOG_LEVEL=DEBUG poetry run terminalgpt new
 
 run-load:
-	poetry run terminalgpt --debug load
+	LOG_LEVEL=DEBUG poetry run terminalgpt load
 
 run-delete:
-	poetry run terminalgpt delete
+	LOG_LEVEL=DEBUG poetry run terminalgpt delete
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "terminalgpt"
-version = "1.0.8"
+version = "1.0.9"
 description = "AI chat asistent in your terminal powered by OpenAI GPT-3.5"
 authors = ["Adam Yodinsky <[email protected]>"]
 keywords=["ai", "chat", "terminal", "openai", "gpt3", "chatGPT", "assistant", "gpt3.5", "terminalGPT", "gpt-3.5-turbo"]

diff --git a/terminalgpt/chat_utils.py b/terminalgpt/chat_utils.py
@@ -1,5 +1,6 @@
 """"Chat utils module for terminalgpt."""
 
+import os
 import sys
 import time
 
@@ -20,7 +21,6 @@ def chat_loop(
     **kwargs,
 ):
     """Main chat loop."""
-    debug: bool = kwargs["debug"]
     token_limit: int = kwargs["token_limit"]
     session: PromptSession = kwargs["session"]
     messages: list = kwargs["messages"]
@@ -71,7 +71,7 @@ def chat_loop(
         print_utils.print_slowly(Fore.YELLOW + message + Style.RESET_ALL)
 
         # Print usage
-        if debug:
+        if os.environ.get("LOG_LEVEL") == "DEBUG":
             print(
                 Fore.LIGHTBLUE_EX
                 + f"\nAPI Total Usage: {str(total_usage)} tokens"
@@ -114,17 +114,6 @@ def get_user_answer(messages):
                     raise error
 
 
-# pylint: disable=unused-argument
-def validate_token_limit(ctx, param, limit: int):
-    """Validates the token limit."""
-
-    arr = [2**i for i in range(2, 13)]
-
-    if limit not in arr or limit < 1024:
-        raise ValueError("Token limit must be between 1024 and 4096 and a power of 2.")
-    return limit
-
-
 def exceeding_token_limit(total_usage: int, token_limit: int):
     """Returns True if the total_usage is greater than the token limit with some safe buffer."""
 
@@ -134,8 +123,8 @@ def exceeding_token_limit(total_usage: int, token_limit: int):
 def reduce_tokens(messages: list, token_limit: int, total_usage: int):
     """Reduce tokens in messages context."""
 
+    reduce_amount = total_usage - token_limit
     while exceeding_token_limit(total_usage, token_limit):
-        reduce_amount = total_usage - token_limit
         message = messages.pop(1)
         tokenized_message = TIKTOKEN_ENCODER.encode(message["content"])
 
@@ -144,8 +133,30 @@ def reduce_tokens(messages: list, token_limit: int, total_usage: int):
             reduce_amount -= 1
             tokenized_message.pop(0)
 
-    message["content"] = TIKTOKEN_ENCODER.decode(tokenized_message)
-    messages.insert(1, message)
+        if len(tokenized_message) == 0 and exceeding_token_limit(
+            total_usage, token_limit
+        ):
+            # every message follows <im_start>{role/name}\n{content}<im_end>\n
+            # thus we need to remove 4 tokens for every message that will be removed
+            # so if the message is empty
+            reduce_amount -= 4
+            total_usage -= 4
+
+            for key, _ in message.items():
+                if key == "name":  # if there's a name, the role is omitted
+                    # role is always required and always 1 token
+                    reduce_amount += 1
+                    total_usage += 1
+
+    if len(tokenized_message) > 0:
+        message["content"] = TIKTOKEN_ENCODER.decode(tokenized_message)
+        messages.insert(1, message)
+
+    if os.environ.get("LOG_LEVEL") == "DEBUG":
+        counted_tokens = num_tokens_from_messages(messages)
+        print(f"Counted usage: {total_usage}")
+        print(f"Real usage tokens: {counted_tokens}")
+
     return messages, total_usage
 
 
@@ -162,7 +173,7 @@ def num_tokens_from_messages(messages) -> int:
             num_tokens += len(encoding.encode(value))
             if key == "name":  # if there's a name, the role is omitted
                 num_tokens += -1  # role is always required and always 1 token
-    num_tokens += 2  # every reply is primed with <im_start>assistant
+    num_tokens -= 2  # every reply is primed with <im_start>assistant
     return num_tokens
 
 

diff --git a/terminalgpt/config.py b/terminalgpt/config.py
@@ -3,8 +3,30 @@
 import platform
 from os import path
 
+# def shell_version():
+#     """Get the current shell version."""
+
+#     shell = os.environ.get("SHELL")
+#     result = None
+
+#     if platform.system() == "Windows":
+#         result = subprocess.run(["ver"], stdout=subprocess.PIPE, check=True)
+#     else:
+#         result = subprocess.run(
+#             [shell, "--version"], stdout=subprocess.PIPE, check=False
+#         )
+#     return result.stdout.decode("utf-8").strip()
+
+
+def machine_info():
+    """Get the current machine info."""
+
+    return platform.platform()
+
+
 APP_NAME = "terminalgpt"
 API_TOKEN_LIMIT = 4096
+SAFETY_BUFFER = 1024
 
 BASE_PATH = f"~/.{APP_NAME}".replace("~", path.expanduser("~"))
 CONVERSATIONS_PATH = f"{BASE_PATH}/conversations"
@@ -17,19 +39,21 @@
 INIT_SYSTEM_MESSAGE = {
     "role": "system",
     "content": f"""
-You are a helpful personal assistant called "TerminalGPT" for a programer on a {platform.platform()} machine.
-Please note that your answers will be displayed on the terminal.
-So keep them short as possible (5 new lines max) and use a suitable format for printing on terminal.
+- Your name is "TerminalGPT".
+- You are a helpful personal assistant for programers.
+- You are running on {machine_info()} machine.
+- Please note that your answers will be displayed on the terminal.
+- So keep answers short as possible and use a suitable format for printing on a terminal.
 """,
 }
 
 
 INIT_WELCOME_MESSAGE = {
     "role": "system",
     "content": """
-Please start with a random and short greeting message starts with 'Welcome to terminalGPT'.
-Add a ton of self humor.
-Keep it short as possible, one line.
+- Please start the conversation with a random and short greeting message starts with 'Welcome to terminalGPT'.
+- Add a ton of self humor.
+- Keep it short as possible, one line.
 """,
 }
 

diff --git a/terminalgpt/main.py b/terminalgpt/main.py
@@ -16,33 +16,16 @@
 
 
 @click.group()
-@click.option(
-    "--debug",
-    is_flag=True,
-    help="Prints amounts of tokens used.",
-    type=bool,
-    default=False,
-)
-@click.option(
-    "--token-limit",
-    help="Set the token limit between 1024 and 4096.",
-    type=int,
-    default=config.API_TOKEN_LIMIT,
-    callback=chat_utils.validate_token_limit,
-)
 @click.pass_context
-def cli(ctx, debug, token_limit):
+def cli(ctx):
     """*~ TerminalGPT - Your Personal Terminal Assistant ~*"""
 
     ctx.ensure_object(dict)
-
-    ctx.obj["DEBUG"] = debug
-    ctx.obj["TOKEN_LIMIT"] = token_limit
-
     ctx.obj["SESSION"] = PromptSession(
         style=PromptStyle.from_dict({"prompt": "bold"}),
         message="\nUser: ",
     )
+    ctx.obj["TOKEN_LIMIT"] = config.API_TOKEN_LIMIT - config.SAFETY_BUFFER
 
     encryption.check_api_key()
     key = encryption.get_encryption_key(config.KEY_PATH)
@@ -93,10 +76,9 @@ def new(ctx):
         config.INIT_SYSTEM_MESSAGE,
     ]
 
-    chat_utils.welcome_message(messages)
+    chat_utils.welcome_message(messages + [config.INIT_WELCOME_MESSAGE])
 
     chat_utils.chat_loop(
-        debug=ctx.obj["DEBUG"],
         token_limit=ctx.obj["TOKEN_LIMIT"],
         session=ctx.obj["SESSION"],
         messages=messages,
@@ -122,7 +104,7 @@ def load(ctx):
         return
 
     # setup file names auto-completion
-    completer = WordCompleter(conversations)
+    completer = WordCompleter(conversations, ignore_case=True)
     print_utils.print_slowly(print_utils.CONVERSATIONS_INIT_MESSAGE)
 
     # print conversations list
@@ -163,18 +145,19 @@ def load(ctx):
     messages.append(config.INIT_WELCOME_BACK_MESSAGE)
     total_usage = chat_utils.num_tokens_from_messages(messages)
 
-    if chat_utils.exceeding_token_limit(total_usage, config.API_TOKEN_LIMIT):
+    token_limit = ctx.obj["TOKEN_LIMIT"]
+
+    if chat_utils.exceeding_token_limit(total_usage, token_limit):
         messages, total_usage = chat_utils.reduce_tokens(
             messages=messages,
             total_usage=total_usage,
-            token_limit=config.API_TOKEN_LIMIT,
+            token_limit=token_limit,
         )
 
     chat_utils.welcome_message(messages=messages)
 
     chat_utils.chat_loop(
-        debug=ctx.obj["DEBUG"],
-        token_limit=ctx.obj["TOKEN_LIMIT"],
+        token_limit=token_limit,
         session=ctx.obj["SESSION"],
         messages=messages,
         conversation_name=conversation,
@@ -199,7 +182,7 @@ def delete():
         return
 
     # setup file names auto completion
-    completer = WordCompleter(conversations)
+    completer = WordCompleter(conversations, ignore_case=True)
     print_utils.print_slowly(print_utils.CONVERSATIONS_INIT_MESSAGE)
 
     # print conversations list
@@ -229,9 +212,9 @@ def delete():
                 + Style.RESET_ALL
             )
 
-            # update conversations list
-            conversations = conv.get_conversations()
-            completer = WordCompleter(conversations)
+            # delete conversation from conversations list
+            conversations.remove(conversation)
+            completer = WordCompleter(conversations, ignore_case=True)
         else:
             print_utils.print_slowly(
                 Style.BRIGHT

diff --git a/tests/test_chat_utils.py b/tests/test_chat_utils.py
@@ -25,40 +25,12 @@ def test_exceeding_token_limit(self):
         self.assertTrue(chat_utils.exceeding_token_limit(1025, 1024))
         self.assertFalse(chat_utils.exceeding_token_limit(1000, 1023))
 
-    def test_validate_token_limit(self):
-        """Tests validate_token_limit function."""
+    def test_num_tokens_from_messages(self):
+        """Tests num_tokens_from_messages function."""
 
-        self.assertEqual(chat_utils.validate_token_limit(None, None, 1024), 1024)
-        self.assertEqual(chat_utils.validate_token_limit(None, None, 2048), 2048)
-        self.assertEqual(chat_utils.validate_token_limit(None, None, 4096), 4096)
-
-        with self.assertRaises(ValueError):
-            chat_utils.validate_token_limit(None, None, 512)
-
-        with self.assertRaises(ValueError):
-            chat_utils.validate_token_limit(None, None, 8192)
-
-        with self.assertRaises(ValueError):
-            chat_utils.validate_token_limit(None, None, 1023)
-
-    def test_reduce_tokens(self):
-        """Tests reduce_tokens function."""
-
-        token_limit = 30
         messages = self.set_test()
-        total_usage = chat_utils.num_tokens_from_messages(messages)
-        print("total_usage:", total_usage)
-
-        messages, total_usage = chat_utils.reduce_tokens(
-            messages, token_limit, total_usage
-        )
+        self.assertEqual(chat_utils.num_tokens_from_messages(messages), 34)
 
-        self.assertEqual(total_usage, token_limit)
-        self.assertEqual(len(messages), 3)
-        self.assertEqual(messages[0]["role"], "system")
-        self.assertEqual(messages[0]["content"], "Hello user Hello user")
-        self.assertEqual(messages[1]["role"], "assistant")
-        self.assertEqual(messages[1]["content"], "")
 
 if __name__ == "__main__":
     unittest.main()
-Original file line number
+Diff line change
@@ Expand Up / @@ -4,3 +4,4 @@ dist @@
     .vscode
     .idea
     dummy.py
+    ARCH.md