Skip to content

Commit cca4a93

Browse files
committed
fix for chat templates and drafting
1 parent 03def28 commit cca4a93

File tree

3 files changed

+27
-11
lines changed

3 files changed

+27
-11
lines changed

gpttype_adapter.cpp

+12-4
Original file line numberDiff line numberDiff line change
@@ -601,10 +601,18 @@ static void speculative_decoding_setup(std::string spec_model_filename, const ll
601601
}
602602
else
603603
{
604-
printf("Error: Draft model vocab of (%d) does not match base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
605-
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
606-
llama_free(draft_ctx);
607-
draft_ctx = nullptr;
604+
int diff = abs(draftvocab-base_n_vocab);
605+
if(diff <= 256)
606+
{
607+
//allow small differences to work
608+
printf("WARNING: Draft model vocab of (%d) does not match base vocab of (%d).\nSpeculative decoding may malfunction!\n",draftvocab,base_n_vocab);
609+
} else {
610+
printf("Error: Draft model vocab of (%d) is too different from base vocab of (%d). Speculative decoding cannot be used!\n",draftvocab,base_n_vocab);
611+
printf("If you REALLY want to override this, run in --debugmode and this restriction will be disabled. However, you might encounter unwanted results!\n");
612+
llama_free(draft_ctx);
613+
draft_ctx = nullptr;
614+
}
615+
608616
}
609617
}
610618
}

kcpp_adapters/DeepSeek-V2.json

+8
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
{
2+
"system_start": "",
3+
"system_end": "",
4+
"user_start": "<|User|>",
5+
"user_end": "",
6+
"assistant_start": "<|Assistant|>",
7+
"assistant_end": "<|end▁of▁sentence|>"
8+
}

koboldcpp.py

+7-7
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
modelbusy = threading.Lock()
6060
requestsinqueue = 0
6161
defaultport = 5001
62-
KcppVersion = "1.82.3"
62+
KcppVersion = "1.82.4"
6363
showdebug = True
6464
guimode = False
6565
showsamplerwarning = True
@@ -3421,7 +3421,7 @@ def auto_set_backend_gui(manual_select=False):
34213421
def on_picked_model_file(filepath):
34223422
if filepath.lower().endswith('.kcpps') or filepath.lower().endswith('.kcppt'):
34233423
#load it as a config file instead
3424-
with open(filepath, 'r') as f:
3424+
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
34253425
dict = json.load(f)
34263426
import_vars(dict)
34273427

@@ -4014,7 +4014,7 @@ def export_vars():
40144014
try:
40154015
if kcpp_exporting_template and isinstance(args.chatcompletionsadapter, str) and args.chatcompletionsadapter!="" and os.path.exists(args.chatcompletionsadapter):
40164016
print("Embedding chat completions adapter...") # parse and save embedded preload story
4017-
with open(args.chatcompletionsadapter, 'r') as f:
4017+
with open(args.chatcompletionsadapter, 'r', encoding='utf-8', errors='ignore') as f:
40184018
args.chatcompletionsadapter = json.load(f)
40194019
except Exception:
40204020
pass
@@ -4025,7 +4025,7 @@ def export_vars():
40254025
try:
40264026
if kcpp_exporting_template and isinstance(args.preloadstory, str) and args.preloadstory!="" and os.path.exists(args.preloadstory):
40274027
print("Embedding preload story...") # parse and save embedded preload story
4028-
with open(args.preloadstory, 'r') as f:
4028+
with open(args.preloadstory, 'r', encoding='utf-8', errors='ignore') as f:
40294029
args.preloadstory = json.load(f)
40304030
except Exception:
40314031
pass
@@ -4283,7 +4283,7 @@ def load_config_gui(): #this is used to populate the GUI with a config file, whe
42834283
if not filename or filename=="":
42844284
return
42854285
runmode_untouched = False
4286-
with open(filename, 'r') as f:
4286+
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
42874287
dict = json.load(f)
42884288
import_vars(dict)
42894289
pass
@@ -4761,7 +4761,7 @@ def unload_libs():
47614761

47624762
def load_config_cli(filename):
47634763
print("Loading .kcpps configuration file...")
4764-
with open(filename, 'r') as f:
4764+
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
47654765
config = json.load(f)
47664766
args.istemplate = False
47674767
raw_args = (sys.argv[1:]) #a lousy hack to allow for overriding kcpps
@@ -4990,7 +4990,7 @@ def main(launch_args,start_server=True):
49904990
ccadapter_path = os.path.abspath(premade_adapt_path)
49914991
if ccadapter_path:
49924992
print(f"Loading Chat Completions Adapter: {ccadapter_path}")
4993-
with open(ccadapter_path, 'r') as f:
4993+
with open(ccadapter_path, 'r', encoding='utf-8', errors='replace') as f:
49944994
chatcompl_adapter = json.load(f)
49954995
canload = True
49964996
else:

0 commit comments

Comments
 (0)