update page

RLE-Foundation · Oct 10, 2023 · 8084fea · 8084fea
1 parent 705a431
commit 8084fea
Show file tree

Hide file tree

Showing 7 changed files with 54 additions and 126 deletions.
diff --git a/DB/chroma.sqlite3 b/DB/chroma.sqlite3
diff --git a/README.md b/README.md
@@ -1,2 +1,43 @@
-# rllte-copilot
-Large language model (LLM)-empowered copilot for RL.
+<div align=center>
+<br>
+<img src='./images/copilot_logo.png' style="width: 75%">
+<br>
+
+RLLTE Copilot: LLM-Empowered Assistant for RL
+</div>
+
+# Introduction
+<img src="./images/arch.png" align="right" width="60%"/>
+
+**Copilot** is the first attempt to integrate an LLM into an RL framework, which aims to help developers reduce the learning cost and facilitate application construction. We follow the design of [LocalGPT](https://github.com/PromtEngineer/localGPT) that interacts privately with documents using the power of GPT. The source documents are first ingested by an instructor embedding tool to create a local vector database. After that, a local LLM is used to understand questions and create answers based on the database. In practice, we utilize Vicuna-7B as the base model and build the database using various corpora, including API documentation, tutorials, and RL references. The powerful understanding ability of the LLM model enables the copilot to accurately answer questions about the use of the framework and any other questions of RL. Moreover, no additional training is required, and users are free to replace the base model according to their computing power.
+
+# Usage
+## Online
+We plan to deploy the **Copilot** on the Hugging Face platform. Currently, we're dealing with the problem of computing power. The online server is coming soon.
+
+## Offline
+
+Firstly, clone the repository by:
+``` sh
+git clone https://github.com/RLE-Foundation/rllte-copilot.git
+```
+
+Then install the necessary dependencies:
+``` sh
+pip install -r requirements.txt
+```
+
+Finally, open a terminal and run the `app.py`:
+``` sh
+python app.py
+```
+
+After that, launch the browser and you'll see the following page:
+
+<div align=center>
+<br>
+<img src='./images/screenshot.png' style="width: 90%">
+<br>
+
+RLLTE Copilot: LLM-Empowered Assistant for RL
+</div>
diff --git a/images/arch.png b/images/arch.png
diff --git a/images/copilot_logo.png b/images/copilot_logo.png
diff --git a/images/screenshot.png b/images/screenshot.png
diff --git a/requirements.txt b/requirements.txt
@@ -28,4 +28,4 @@ Streamlit-extras
 
 # Excel File Manipulation
 openpyxl
-gradio
+gradio==3.44.0
diff --git a/src/constants.py b/src/constants.py
@@ -1,21 +1,20 @@
-import os
-
-# device type
-DEVICE_TYPE = "cpu"
-
-# from dotenv import load_dotenv
 from chromadb.config import Settings
+from langchain.document_loaders import (CSVLoader, 
+                                        PDFMinerLoader, 
+                                        TextLoader, 
+                                        UnstructuredExcelLoader,
+                                        Docx2txtLoader)
+import os
 
-# https://python.langchain.com/en/latest/modules/indexes/document_loaders/examples/excel.html?highlight=xlsx#microsoft-excel
-from langchain.document_loaders import CSVLoader, PDFMinerLoader, TextLoader, UnstructuredExcelLoader, Docx2txtLoader
+DEVICE_TYPE = "cuda"
 
 # load_dotenv()
 ROOT_DIRECTORY = os.path.dirname(os.path.realpath(__file__))
 
 # Define the folder for storing database
 SOURCE_DIRECTORY = f"{ROOT_DIRECTORY}/SOURCE_DOCUMENTS"
 
-PERSIST_DIRECTORY = f"../DB"
+PERSIST_DIRECTORY = f"DB"
 
 # Can be changed to a specific number
 INGEST_THREADS = os.cpu_count() or 8
@@ -44,120 +43,8 @@
 }
 
 # Default Instructor Model
-EMBEDDING_MODEL_NAME = "hkunlp/instructor-large" # Uses 1.5 GB of VRAM (High Accuracy with lower VRAM usage)
-
-####
-#### OTHER EMBEDDING MODEL OPTIONS
-####
-
-# EMBEDDING_MODEL_NAME = "hkunlp/instructor-xl" # Uses 5 GB of VRAM (Most Accurate of all models)
-# EMBEDDING_MODEL_NAME = "intfloat/e5-large-v2" # Uses 1.5 GB of VRAM (A little less accurate than instructor-large)
-# EMBEDDING_MODEL_NAME = "intfloat/e5-base-v2" # Uses 0.5 GB of VRAM (A good model for lower VRAM GPUs)
-# EMBEDDING_MODEL_NAME = "all-MiniLM-L6-v2" # Uses 0.2 GB of VRAM (Less accurate but fastest - only requires 150mb of vram)
-
-####
-#### MULTILINGUAL EMBEDDING MODELS
-####
-
-# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-large" # Uses 2.5 GB of VRAM 
-# EMBEDDING_MODEL_NAME = "intfloat/multilingual-e5-base" # Uses 1.2 GB of VRAM 
-
-
-#### SELECT AN OPEN SOURCE LLM (LARGE LANGUAGE MODEL)
-    # Select the Model ID and model_basename
-    # load the LLM for generating Natural Language responses
-
-#### GPU VRAM Memory required for LLM Models (ONLY) by Billion Parameter value (B Model)
-#### Does not include VRAM used by Embedding Models - which use an additional 2GB-7GB of VRAM depending on the model.
-####
-#### (B Model)   (float32)    (float16)    (GPTQ 8bit)         (GPTQ 4bit)
-####    7b         28 GB        14 GB       7 GB - 9 GB        3.5 GB - 5 GB     
-####    13b        52 GB        26 GB       13 GB - 15 GB      6.5 GB - 8 GB    
-####    32b        130 GB       65 GB       32.5 GB - 35 GB    16.25 GB - 19 GB  
-####    65b        260.8 GB     130.4 GB    65.2 GB - 67 GB    32.6 GB -  - 35 GB  
-
-# MODEL_ID = "TheBloke/Llama-2-7B-Chat-GGML"
-# MODEL_BASENAME = "llama-2-7b-chat.ggmlv3.q4_0.bin"
-
-####
-#### (FOR GGUF MODELS)
-####
-
-# MODEL_ID = "TheBloke/Llama-2-13b-Chat-GGUF"
-# MODEL_BASENAME = "llama-2-13b-chat.Q4_K_M.gguf"
+EMBEDDING_MODEL_NAME = "hkunlp/instructor-large"
 
-# MODEL_ID = "TheBloke/Llama-2-7b-Chat-GGUF"
-# MODEL_BASENAME = "llama-2-7b-chat.Q4_K_M.gguf"
-
-# MODEL_ID = "TheBloke/Llama-2-70b-Chat-GGUF"
-# MODEL_BASENAME = "llama-2-70b-chat.Q4_K_M.gguf"
-
-####
-#### (FOR HF MODELS)
-####
-
-# MODEL_ID = "NousResearch/Llama-2-7b-chat-hf"
-# MODEL_BASENAME = None
+# Source LLM Model
 MODEL_ID = "TheBloke/vicuna-7B-1.1-HF"
 MODEL_BASENAME = None
-# MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-HF"
-# MODEL_ID = "TheBloke/guanaco-7B-HF"
-# MODEL_ID = 'NousResearch/Nous-Hermes-13b' # Requires ~ 23GB VRAM. Using STransformers
-# alongside will 100% create OOM on 24GB cards.
-# llm = load_model(device_type, model_id=model_id)
-
-####
-#### (FOR GPTQ QUANTIZED) Select a llm model based on your GPU and VRAM GB. Does not include Embedding Models VRAM usage.
-####
-
-##### 48GB VRAM Graphics Cards (RTX 6000, RTX A6000 and other 48GB VRAM GPUs) #####
-
-### 65b GPTQ LLM Models for 48GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***)
-# MODEL_ID = "TheBloke/guanaco-65B-GPTQ"
-# MODEL_BASENAME = "model.safetensors"
-# MODEL_ID = "TheBloke/Airoboros-65B-GPT4-2.0-GPTQ"
-# MODEL_BASENAME = "model.safetensors"
-# MODEL_ID = "TheBloke/gpt4-alpaca-lora_mlp-65B-GPTQ"
-# MODEL_BASENAME = "model.safetensors"
-# MODEL_ID = "TheBloke/Upstage-Llama1-65B-Instruct-GPTQ" 
-# MODEL_BASENAME = "model.safetensors"    
-
-##### 24GB VRAM Graphics Cards (RTX 3090 - RTX 4090 (35% Faster) - RTX A5000 - RTX A5500) #####
-
-### 13b GPTQ Models for 24GB GPUs (*** With best embedding model: hkunlp/instructor-xl ***)
-# MODEL_ID = "TheBloke/Wizard-Vicuna-13B-Uncensored-GPTQ"
-# MODEL_BASENAME = "Wizard-Vicuna-13B-Uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
-# MODEL_ID = "TheBloke/vicuna-13B-v1.5-GPTQ"
-# MODEL_BASENAME = "model.safetensors"
-# MODEL_ID = "TheBloke/Nous-Hermes-13B-GPTQ"
-# MODEL_BASENAME = "nous-hermes-13b-GPTQ-4bit-128g.no-act.order"
-# MODEL_ID = "TheBloke/WizardLM-13B-V1.2-GPTQ" 
-# MODEL_BASENAME = "gptq_model-4bit-128g.safetensors
-
-### 30b GPTQ Models for 24GB GPUs (*** Requires using intfloat/e5-base-v2 instead of hkunlp/instructor-large as embedding model ***)
-# MODEL_ID = "TheBloke/Wizard-Vicuna-30B-Uncensored-GPTQ"
-# MODEL_BASENAME = "Wizard-Vicuna-30B-Uncensored-GPTQ-4bit--1g.act.order.safetensors" 
-# MODEL_ID = "TheBloke/WizardLM-30B-Uncensored-GPTQ"
-# MODEL_BASENAME = "WizardLM-30B-Uncensored-GPTQ-4bit.act-order.safetensors" 
-
-##### 8-10GB VRAM Graphics Cards (RTX 3080 - RTX 3080 Ti - RTX 3070 Ti - 3060 Ti - RTX 2000 Series, Quadro RTX 4000, 5000, 6000) #####
-### (*** Requires using intfloat/e5-small-v2 instead of hkunlp/instructor-large as embedding model ***)
-
-### 7b GPTQ Models for 8GB GPUs
-# MODEL_ID = "TheBloke/Wizard-Vicuna-7B-Uncensored-GPTQ"
-# MODEL_BASENAME = "model.safetensors"
-# MODEL_ID = "TheBloke/WizardLM-7B-uncensored-GPTQ"
-# MODEL_BASENAME = "WizardLM-7B-uncensored-GPTQ-4bit-128g.compat.no-act-order.safetensors"
-# MODEL_ID = "TheBloke/wizardLM-7B-GPTQ"
-# MODEL_BASENAME = "model.safetensors"
-
-####
-#### (FOR GGML) (Quantized cpu+gpu+mps) models - check if they support llama.cpp
-####
-
-# MODEL_ID = "TheBloke/wizard-vicuna-13B-GGML"
-# MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q4_0.bin"
-# MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q6_K.bin"
-# MODEL_BASENAME = "wizard-vicuna-13B.ggmlv3.q2_K.bin"
-# MODEL_ID = "TheBloke/orca_mini_3B-GGML"
-# MODEL_BASENAME = "orca-mini-3b.ggmlv3.q4_0.bin"