From d9668d279c38c25a57ac5a1ce516cda2a885025c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=91=A8=E5=85=AC=E4=B8=8D=E8=A7=A3=E6=A2=A6?= Date: Fri, 2 Feb 2024 10:41:31 +0800 Subject: [PATCH 1/7] Start bat scripts --- start_GUI.bat | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 start_GUI.bat diff --git a/start_GUI.bat b/start_GUI.bat new file mode 100644 index 00000000..51fd018e --- /dev/null +++ b/start_GUI.bat @@ -0,0 +1,8 @@ +@echo off + +set HF_HOME=huggingface + +call venv\Scripts\activate +python gradio_demo.py + +pause From ff05903b02f638b548ccb3ef09924a405391f568 Mon Sep 17 00:00:00 2001 From: SleeeepyZhou Date: Fri, 2 Feb 2024 11:09:06 +0800 Subject: [PATCH 2/7] Update ignore --- .gitignore | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.gitignore b/.gitignore index 0e5ac793..3768286e 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .venv -__pycache__ \ No newline at end of file +__pycache__ +/huggingface +/.vs From 3b690dc8697977fc7e748f50f4d27169d2268673 Mon Sep 17 00:00:00 2001 From: SleeeepyZhou Date: Tue, 6 Feb 2024 16:40:55 +0800 Subject: [PATCH 3/7] update api --- API.py | 358 +++++++++++++++++++++++++++++++++++++++++ moondream/moondream.py | 3 +- 2 files changed, 360 insertions(+), 1 deletion(-) create mode 100644 API.py diff --git a/API.py b/API.py new file mode 100644 index 00000000..b51bfce2 --- /dev/null +++ b/API.py @@ -0,0 +1,358 @@ +import time +from matplotlib.pyplot import hist +import uvicorn +import argparse + +import torch +from transformers import AutoModelForCausalLM, LlamaTokenizer, PreTrainedModel, PreTrainedTokenizer, \ + TextIteratorStreamer, CodeGenTokenizerFast as Tokenizer +from sse_starlette.sse import EventSourceResponse + +from loguru import logger +from typing import List, Literal, Union, Tuple, Optional + +from fastapi import FastAPI, HTTPException +from fastapi.middleware.cors import CORSMiddleware + +from pydantic import BaseModel, Field + + +# 请求 +class TextContent(BaseModel): + type: Literal["text"] + text: str +class ImageUrl(BaseModel): + url: str +class ImageUrlContent(BaseModel): + type: Literal["image_url"] + image_url: ImageUrl +ContentItem = Union[TextContent, ImageUrlContent] +class ChatMessageInput(BaseModel): + role: Literal["user", "assistant", "system"] + content: Union[str, List[ContentItem]] + name: Optional[str] = None +class ChatCompletionRequest(BaseModel): + model: str + messages: List[ChatMessageInput] + temperature: Optional[float] = 0.8 + top_p: Optional[float] = 0.8 + max_tokens: Optional[int] = None + stream: Optional[bool] = False + # Additional parameters + repetition_penalty: Optional[float] = 1.0 + +# 响应 +class ChatMessageResponse(BaseModel): + role: Literal["assistant"] + content: str = None + name: Optional[str] = None +class ChatCompletionResponseChoice(BaseModel): + index: int + message: ChatMessageResponse +class DeltaMessage(BaseModel): + role: Optional[Literal["user", "assistant", "system"]] = None + content: Optional[str] = None +class ChatCompletionResponseStreamChoice(BaseModel): + index: int + delta: DeltaMessage +class UsageInfo(BaseModel): + prompt_tokens: int = 0 + total_tokens: int = 0 + completion_tokens: Optional[int] = 0 +class ChatCompletionResponse(BaseModel): + model: str + object: Literal["chat.completion", "chat.completion.chunk"] + choices: List[Union[ChatCompletionResponseChoice, ChatCompletionResponseStreamChoice]] + created: Optional[int] = Field(default_factory=lambda: int(time.time())) + usage: Optional[UsageInfo] = None + + +import requests +import base64 +from PIL import Image +from io import BytesIO +import re +from threading import Thread + +from moondream import Moondream, detect_device +from contextlib import asynccontextmanager + +# 图片输入处理 +def process_img(input_data): + if isinstance(input_data, str): + # URL + if input_data.startswith("http://") or input_data.startswith("https://"): + response = requests.get(input_data) + image_data = response.content + pil_image = Image.open(BytesIO(image_data)).convert('RGB') + # base64 + elif input_data.startswith("data:image/"): + base64_data = input_data.split(",")[1] + image_data = base64.b64decode(base64_data) + pil_image = Image.open(BytesIO(image_data)).convert('RGB') + # img_path + else: + pil_image = Image.open(input_data) + # PIL + elif isinstance(input_data, Image.Image): + pil_image = input_data + else: + raise ValueError("data type error") + + return pil_image + +# 历史消息处理 +def process_history_and_images(messages: List[ChatMessageInput]) -> Tuple[ + Optional[str], Optional[str], Optional[List[Image.Image]]]: + + def chat_history_to_prompt(history): + prompt = "" + for i, (old_query, response) in enumerate(history): + prompt += f"Question: {old_query}\n\nAnswer: {response}\n\n" + return prompt + + last_user_texts = '' + formatted_history = [] + image_list = [] + + for i, message in enumerate(messages): + role = message.role + content = message.content + + if isinstance(content, list): # text + text_content = ' '.join(item.text for item in content if isinstance(item, TextContent)) + else: + text_content = content + + if isinstance(content, list): # image + for item in content: + if isinstance(item, ImageUrlContent): + image_url = item.image_url.url + image = process_img(image_url) + image_list.append(image) + + if role == 'user': + if i == len(messages) - 1: # last message + last_user_texts = text_content + else: + formatted_history.append((text_content, '')) + elif role == 'assistant': + if formatted_history: + if formatted_history[-1][1] != '': + assert False, f"the last texts is answered. answer again. {formatted_history[-1][0]}, {formatted_history[-1][1]}, {text_content}" + formatted_history[-1] = (formatted_history[-1][0], text_content) + else: + assert False, f"assistant reply before user" + else: + assert False, f"unrecognized role: {role}" + + history = chat_history_to_prompt(formatted_history) + + return last_user_texts, history, image_list + + +@torch.inference_mode() +# Moondrean推理 +def generate_stream_moondream(params: dict): + global model, tokenizer + + # 输入处理 + messages = params["messages"] + + temperature = float(params.get("temperature", 1.0)) + repetition_penalty = float(params.get("repetition_penalty", 1.0)) + top_p = float(params.get("top_p", 1.0)) + max_new_tokens = int(params.get("max_tokens", 128)) + + prompt, history, image_list = process_history_and_images(messages) + # 只处理最后一张图 + img = image_list[-1] + + # 构建输入 + ''' + answer_question( + image_embeds, + question, + tokenizer, + max_new_tokens, + chat_history="", + result_queue=None, + **kwargs, + ) + ''' + image_embeds = model.encode_image(img) + streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True) + gen_kwargs = { + "image_embeds": image_embeds, + "question": prompt, + "tokenizer": tokenizer, + "max_new_tokens": max_new_tokens, + "chat_history":history, + "repetition_penalty": repetition_penalty, + "do_sample": False, + "top_p": top_p, + "streamer": streamer, + } + if temperature > 1e-5: + gen_kwargs["temperature"] = temperature + + thread = Thread( + target=model.answer_question, + kwargs=gen_kwargs, + ) + + input_ids = [tokenizer.bos_token_id] + input_echo_len = len(torch.tensor(input_ids, dtype=torch.long)[0]) + total_len = 0 + # 启动推理 + thread.start() + buffer = "" + for new_text in streamer: + clean_text = re.sub("<$|END$", "", new_text) + buffer += clean_text + yield { + "text": buffer, + # 返回生成的文本 + "usage": { + "prompt_tokens": input_echo_len, + "completion_tokens": total_len - input_echo_len, + "total_tokens": total_len, + }, + } + generated_ret ={ + "text": buffer.strip("= 8: + torch_type = torch.bfloat16 + else: + torch_type = torch.float16 + + print("========Use torch type as:{} with device:{}========\n\n".format(torch_type, DEVICE)) + + load_mod(MODEL_PATH) + + uvicorn.run(app, host='0.0.0.0', port=8000, workers=1) diff --git a/moondream/moondream.py b/moondream/moondream.py index 9a223d4e..80c213e8 100644 --- a/moondream/moondream.py +++ b/moondream/moondream.py @@ -78,6 +78,7 @@ def answer_question( image_embeds, question, tokenizer, + max_new_tokens, chat_history="", result_queue=None, **kwargs, @@ -88,7 +89,7 @@ def answer_question( prompt, eos_text="", tokenizer=tokenizer, - max_new_tokens=128, + max_new_tokens=max_new_tokens, **kwargs, )[0] cleaned_answer = re.sub("<$", "", re.sub("END$", "", answer)).strip() From fa8d1354ef7a9e5b6344d7ab210d6cb9ed43208f Mon Sep 17 00:00:00 2001 From: SleeeepyZhou Date: Tue, 6 Feb 2024 16:43:43 +0800 Subject: [PATCH 4/7] update api --- API.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/API.py b/API.py index b51bfce2..81c13846 100644 --- a/API.py +++ b/API.py @@ -201,8 +201,7 @@ def generate_stream_moondream(params: dict): kwargs=gen_kwargs, ) - input_ids = [tokenizer.bos_token_id] - input_echo_len = len(torch.tensor(input_ids, dtype=torch.long)[0]) + input_echo_len = 0 total_len = 0 # 启动推理 thread.start() @@ -211,8 +210,7 @@ def generate_stream_moondream(params: dict): clean_text = re.sub("<$|END$", "", new_text) buffer += clean_text yield { - "text": buffer, - # 返回生成的文本 + "text": buffer.strip(" Date: Tue, 6 Feb 2024 17:15:20 +0800 Subject: [PATCH 5/7] Delete start_GUI.bat --- start_GUI.bat | 8 -------- 1 file changed, 8 deletions(-) delete mode 100644 start_GUI.bat diff --git a/start_GUI.bat b/start_GUI.bat deleted file mode 100644 index 51fd018e..00000000 --- a/start_GUI.bat +++ /dev/null @@ -1,8 +0,0 @@ -@echo off - -set HF_HOME=huggingface - -call venv\Scripts\activate -python gradio_demo.py - -pause From 1445cbd87cbeaa603d7dfb39192672e1cca53501 Mon Sep 17 00:00:00 2001 From: SleeeepyZhou Date: Tue, 6 Feb 2024 17:20:33 +0800 Subject: [PATCH 6/7] update api script --- API.py => openai_api_demo.py | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) rename API.py => openai_api_demo.py (98%) diff --git a/API.py b/openai_api_demo.py similarity index 98% rename from API.py rename to openai_api_demo.py index 81c13846..91e18024 100644 --- a/API.py +++ b/openai_api_demo.py @@ -4,8 +4,7 @@ import argparse import torch -from transformers import AutoModelForCausalLM, LlamaTokenizer, PreTrainedModel, PreTrainedTokenizer, \ - TextIteratorStreamer, CodeGenTokenizerFast as Tokenizer +from transformers import TextIteratorStreamer, CodeGenTokenizerFast as Tokenizer from sse_starlette.sse import EventSourceResponse from loguru import logger @@ -16,6 +15,15 @@ from pydantic import BaseModel, Field +import requests +import base64 +from PIL import Image +from io import BytesIO +import re +from threading import Thread + +from moondream import Moondream, detect_device +from contextlib import asynccontextmanager # 请求 class TextContent(BaseModel): @@ -66,17 +74,6 @@ class ChatCompletionResponse(BaseModel): created: Optional[int] = Field(default_factory=lambda: int(time.time())) usage: Optional[UsageInfo] = None - -import requests -import base64 -from PIL import Image -from io import BytesIO -import re -from threading import Thread - -from moondream import Moondream, detect_device -from contextlib import asynccontextmanager - # 图片输入处理 def process_img(input_data): if isinstance(input_data, str): From a23bee6b699d2c9c7ff96500e65297e1f61420d9 Mon Sep 17 00:00:00 2001 From: SleeeepyZhou Date: Tue, 6 Feb 2024 17:49:03 +0800 Subject: [PATCH 7/7] update api require --- openai_api_demo.py | 1 - openapi_requirements.txt | 6 ++++++ 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 openapi_requirements.txt diff --git a/openai_api_demo.py b/openai_api_demo.py index 91e18024..54cd358e 100644 --- a/openai_api_demo.py +++ b/openai_api_demo.py @@ -1,5 +1,4 @@ import time -from matplotlib.pyplot import hist import uvicorn import argparse diff --git a/openapi_requirements.txt b/openapi_requirements.txt new file mode 100644 index 00000000..32da27aa --- /dev/null +++ b/openapi_requirements.txt @@ -0,0 +1,6 @@ +sse-starlette>=1.8.2 +fastapi>=0.105.0 +loguru~=0.7.2 +uvicorn~=0.24.0 +requests +pydantic>=2.5.2 \ No newline at end of file