Skip to content

Commit c1ae815

Browse files
committed
fix(misc): Format
1 parent b77e507 commit c1ae815

35 files changed

+2270
-1332
lines changed

examples/batch-processing/server.py

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@
2525

2626
import openai.types.chat as types
2727

28+
2829
@app.post("/v1/chat/completions")
2930
def create_chat_completions():
3031
return {"message": "Hello World"}

examples/gradio_chat/local.py

+15-7
Original file line numberDiff line numberDiff line change
@@ -6,25 +6,26 @@
66
llama = llama_cpp.Llama.from_pretrained(
77
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
88
filename="*q8_0.gguf",
9-
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
10-
verbose=False
9+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
10+
"Qwen/Qwen1.5-0.5B"
11+
),
12+
verbose=False,
1113
)
1214

1315
model = "gpt-3.5-turbo"
1416

17+
1518
def predict(message, history):
1619
messages = []
1720

1821
for user_message, assistant_message in history:
1922
messages.append({"role": "user", "content": user_message})
2023
messages.append({"role": "assistant", "content": assistant_message})
21-
24+
2225
messages.append({"role": "user", "content": message})
2326

2427
response = llama.create_chat_completion_openai_v1(
25-
model=model,
26-
messages=messages,
27-
stream=True
28+
model=model, messages=messages, stream=True
2829
)
2930

3031
text = ""
@@ -52,7 +53,14 @@ def predict(message, history):
5253
"""
5354

5455
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
55-
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
56+
gr.ChatInterface(
57+
predict,
58+
fill_height=True,
59+
examples=[
60+
"What is the capital of France?",
61+
"Who was the first person on the moon?",
62+
],
63+
)
5664

5765

5866
if __name__ == "__main__":

examples/gradio_chat/server.py

+12-9
Original file line numberDiff line numberDiff line change
@@ -2,26 +2,22 @@
22

33
from openai import OpenAI
44

5-
client = OpenAI(
6-
base_url="http://localhost:8000/v1",
7-
api_key="llama.cpp"
8-
)
5+
client = OpenAI(base_url="http://localhost:8000/v1", api_key="llama.cpp")
96

107
model = "gpt-3.5-turbo"
118

9+
1210
def predict(message, history):
1311
messages = []
1412

1513
for user_message, assistant_message in history:
1614
messages.append({"role": "user", "content": user_message})
1715
messages.append({"role": "assistant", "content": assistant_message})
18-
16+
1917
messages.append({"role": "user", "content": message})
2018

2119
response = client.chat.completions.create(
22-
model=model,
23-
messages=messages,
24-
stream=True
20+
model=model, messages=messages, stream=True
2521
)
2622

2723
text = ""
@@ -49,7 +45,14 @@ def predict(message, history):
4945
"""
5046

5147
with gr.Blocks(theme=gr.themes.Soft(), js=js, css=css, fill_height=True) as demo:
52-
gr.ChatInterface(predict, fill_height=True, examples=["What is the capital of France?", "Who was the first person on the moon?"])
48+
gr.ChatInterface(
49+
predict,
50+
fill_height=True,
51+
examples=[
52+
"What is the capital of France?",
53+
"Who was the first person on the moon?",
54+
],
55+
)
5356

5457

5558
if __name__ == "__main__":

examples/hf_pull/main.py

+9-12
Original file line numberDiff line numberDiff line change
@@ -5,29 +5,26 @@
55
llama = llama_cpp.Llama.from_pretrained(
66
repo_id="Qwen/Qwen1.5-0.5B-Chat-GGUF",
77
filename="*q8_0.gguf",
8-
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B"),
9-
verbose=False
8+
tokenizer=llama_cpp.llama_tokenizer.LlamaHFTokenizer.from_pretrained(
9+
"Qwen/Qwen1.5-0.5B"
10+
),
11+
verbose=False,
1012
)
1113

1214
response = llama.create_chat_completion(
13-
messages=[
14-
{
15-
"role": "user",
16-
"content": "What is the capital of France?"
17-
}
18-
],
15+
messages=[{"role": "user", "content": "What is the capital of France?"}],
1916
response_format={
2017
"type": "json_object",
2118
"schema": {
2219
"type": "object",
2320
"properties": {
2421
"country": {"type": "string"},
25-
"capital": {"type": "string"}
22+
"capital": {"type": "string"},
2623
},
2724
"required": ["country", "capital"],
28-
}
25+
},
2926
},
30-
stream=True
27+
stream=True,
3128
)
3229

3330
for chunk in response:
@@ -36,4 +33,4 @@
3633
continue
3734
print(delta["content"], end="", flush=True)
3835

39-
print()
36+
print()

examples/high_level_api/fastapi_server.py

+1
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
To actually see the implementation of the server, see llama_cpp/server/app.py
2525
2626
"""
27+
2728
import os
2829
import uvicorn
2930

examples/high_level_api/high_level_api_infill.py

+12-8
Original file line numberDiff line numberDiff line change
@@ -6,16 +6,16 @@
66
parser.add_argument("-m", "--model", type=str, default="../models/7B/ggml-models.bin")
77
parser.add_argument("-p", "--prompt", type=str, default="def add(")
88
parser.add_argument("-s", "--suffix", type=str, default="\n return sum\n\n")
9-
parser.add_argument("-i", "--spm-infill", action='store_true')
9+
parser.add_argument("-i", "--spm-infill", action="store_true")
1010
args = parser.parse_args()
1111

1212
llm = Llama(model_path=args.model, n_gpu_layers=-1, spm_infill=args.spm_infill)
1313

1414
output = llm.create_completion(
15-
temperature = 0.0,
16-
repeat_penalty = 1.0,
17-
prompt = args.prompt,
18-
suffix = args.suffix,
15+
temperature=0.0,
16+
repeat_penalty=1.0,
17+
prompt=args.prompt,
18+
suffix=args.suffix,
1919
)
2020

2121
# Models sometimes repeat suffix in response, attempt to filter that
@@ -25,9 +25,13 @@
2525
unwanted_response_length = len(unwanted_response_suffix)
2626

2727
filtered = False
28-
if unwanted_response_suffix and response_stripped[-unwanted_response_length:] == unwanted_response_suffix:
28+
if (
29+
unwanted_response_suffix
30+
and response_stripped[-unwanted_response_length:] == unwanted_response_suffix
31+
):
2932
response = response_stripped[:-unwanted_response_length]
3033
filtered = True
3134

32-
print(f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{args.prompt}\033[32m{response}\033[{'33' if filtered else '0'}m{args.suffix}\033[0m")
33-
35+
print(
36+
f"Fill-in-Middle completion{' (filtered)' if filtered else ''}:\n\n{args.prompt}\033[32m{response}\033[{'33' if filtered else '0'}m{args.suffix}\033[0m"
37+
)

examples/low_level_api/Chat.py

+28-24
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,12 @@
33
from common import GptParams
44
from low_level_api_chat_cpp import LLaMAInteract
55

6+
67
def env_or_def(env, default):
7-
if (env in os.environ):
8-
return os.environ[env]
9-
return default
8+
if env in os.environ:
9+
return os.environ[env]
10+
return default
11+
1012

1113
AI_NAME = env_or_def("AI_NAME", "ChatLLaMa")
1214
MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
@@ -15,10 +17,10 @@ def env_or_def(env, default):
1517
N_THREAD = int(env_or_def("N_THREAD", "8"))
1618

1719
today = datetime.datetime.today()
18-
DATE_YEAR=today.strftime("%Y")
19-
DATE_TIME=today.strftime("%H:%M")
20+
DATE_YEAR = today.strftime("%Y")
21+
DATE_TIME = today.strftime("%H:%M")
2022

21-
prompt=f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}.
23+
prompt = f"""Text transcript of a never ending dialog, where {USER_NAME} interacts with an AI assistant named {AI_NAME}.
2224
{AI_NAME} is helpful, kind, honest, friendly, good at writing and never fails to answer {USER_NAME}'s requests immediately and with details and precision.
2325
There are no annotations like (30 seconds passed...) or (to himself), just what {USER_NAME} and {AI_NAME} say aloud to each other.
2426
The dialog lasts for years, the entirety of it is shared below. It's 10000 pages long.
@@ -45,27 +47,29 @@ def env_or_def(env, default):
4547
{AI_NAME}: Blue.
4648
{USER_NAME}: What time is it?
4749
{AI_NAME}: It is {DATE_TIME}.
48-
{USER_NAME}:""" + " ".join(sys.argv[1:])
50+
{USER_NAME}:""" + " ".join(
51+
sys.argv[1:]
52+
)
4953

5054
print("Loading model...")
5155
params = GptParams(
52-
n_ctx=2048,
53-
temp=0.7,
54-
top_k=40,
55-
top_p=0.5,
56-
repeat_last_n=256,
57-
n_batch=1024,
58-
repeat_penalty=1.17647,
59-
model=MODEL,
60-
n_threads=N_THREAD,
61-
n_predict=N_PREDICTS,
62-
use_color=True,
63-
interactive=True,
64-
antiprompt=[f"{USER_NAME}:"],
65-
input_prefix=" ",
66-
input_suffix=f"{AI_NAME}:",
67-
prompt=prompt,
56+
n_ctx=2048,
57+
temp=0.7,
58+
top_k=40,
59+
top_p=0.5,
60+
repeat_last_n=256,
61+
n_batch=1024,
62+
repeat_penalty=1.17647,
63+
model=MODEL,
64+
n_threads=N_THREAD,
65+
n_predict=N_PREDICTS,
66+
use_color=True,
67+
interactive=True,
68+
antiprompt=[f"{USER_NAME}:"],
69+
input_prefix=" ",
70+
input_suffix=f"{AI_NAME}:",
71+
prompt=prompt,
6872
)
6973

7074
with LLaMAInteract(params) as m:
71-
m.interact()
75+
m.interact()

examples/low_level_api/Miku.py

+25-21
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,20 @@
33
from common import GptParams
44
from low_level_api_chat_cpp import LLaMAInteract
55

6+
67
def env_or_def(env, default):
7-
if (env in os.environ):
8-
return os.environ[env]
9-
return default
8+
if env in os.environ:
9+
return os.environ[env]
10+
return default
11+
1012

1113
AI_NAME = env_or_def("AI_NAME", "Miku")
1214
MODEL = env_or_def("MODEL", "./models/llama-13B/ggml-model.bin")
1315
USER_NAME = env_or_def("USER_NAME", "Anon")
1416
N_PREDICTS = int(env_or_def("N_PREDICTS", "4096"))
1517
N_THREAD = int(env_or_def("N_THREAD", "0"))
1618

17-
prompt=f"""This is a transcript of a 1000 page, never ending conversation between {USER_NAME} and the cute and helpful AI assistant {AI_NAME}. {AI_NAME} is a girl who is an AI running on the users computer.
19+
prompt = f"""This is a transcript of a 1000 page, never ending conversation between {USER_NAME} and the cute and helpful AI assistant {AI_NAME}. {AI_NAME} is a girl who is an AI running on the users computer.
1820
{AI_NAME} can think for herself without the user seeing her thoughts by adding a /think prefix to her output. She uses this to reason about the world and to think about what she should say next.
1921
{AI_NAME} is always coherent and makes sense, but if she isn't sure if what she is saying is correct she will ask the user for help.
2022
{AI_NAME} is a very helpful AI and will help the user with anything they need, she is also very friendly and will try to make the user feel better if they are sad.
@@ -32,28 +34,30 @@ def env_or_def(env, default):
3234
{AI_NAME}: /think It sounds like {USER_NAME} is happy to have me as their assistant! I'm so happy too! ^_^ Glad that whole emotion thing didn't scare him off!
3335
{AI_NAME}: /think I wonder what {USER_NAME} likes to do in his free time? I should ask him about that!
3436
{AI_NAME}: What do you like to do in your free time? ^_^
35-
{USER_NAME}:""" + " ".join(sys.argv[1:])
37+
{USER_NAME}:""" + " ".join(
38+
sys.argv[1:]
39+
)
3640

3741
print("Loading model...")
3842
params = GptParams(
39-
n_batch=1024,
40-
n_ctx=2048,
41-
n_keep=-1,
42-
repeat_last_n=256,
43-
repeat_penalty=1.17647,
44-
temp=0.7,
45-
top_k=40,
46-
top_p=0.5,
47-
model=MODEL,
48-
n_predict=N_PREDICTS,
49-
use_color=True,
50-
interactive=True,
51-
antiprompt=[f"{USER_NAME}:"],
52-
prompt=prompt,
43+
n_batch=1024,
44+
n_ctx=2048,
45+
n_keep=-1,
46+
repeat_last_n=256,
47+
repeat_penalty=1.17647,
48+
temp=0.7,
49+
top_k=40,
50+
top_p=0.5,
51+
model=MODEL,
52+
n_predict=N_PREDICTS,
53+
use_color=True,
54+
interactive=True,
55+
antiprompt=[f"{USER_NAME}:"],
56+
prompt=prompt,
5357
)
5458

5559
if N_THREAD > 0:
56-
params.n_threads = N_THREAD
60+
params.n_threads = N_THREAD
5761

5862
with LLaMAInteract(params) as m:
59-
m.interact()
63+
m.interact()

0 commit comments

Comments
 (0)