Skip to content

Commit 14dec0c

Browse files
CISCngxson
andauthored
main: use jinja chat template system prompt by default (#12118)
* Use jinja chat template system prompt by default * faster conditional order * remove nested ternary --------- Co-authored-by: Xuan Son Nguyen <[email protected]>
1 parent 1782cdf commit 14dec0c

File tree

1 file changed

+20
-11
lines changed

1 file changed

+20
-11
lines changed

examples/main/main.cpp

+20-11
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@
3131
#pragma warning(disable: 4244 4267) // possible loss of data
3232
#endif
3333

34-
static const char * DEFAULT_SYSTEM_MESSAGE = "You are a helpful assistant";
35-
3634
static llama_context ** g_ctx;
3735
static llama_model ** g_model;
3836
static common_sampler ** g_smpl;
@@ -267,6 +265,7 @@ int main(int argc, char ** argv) {
267265

268266
std::vector<llama_token> embd_inp;
269267

268+
bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt.empty();
270269
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
271270
common_chat_msg new_msg;
272271
new_msg.role = role;
@@ -278,11 +277,20 @@ int main(int argc, char ** argv) {
278277
};
279278

280279
{
281-
auto prompt = (params.conversation_mode && params.enable_chat_template)
282-
// format the system prompt in conversation mode (fallback to default if empty)
283-
? chat_add_and_format("system", params.system_prompt.empty() ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt)
280+
std::string prompt;
281+
282+
if (params.conversation_mode && params.enable_chat_template) {
283+
// format the system prompt in conversation mode (will use template default if empty)
284+
prompt = params.system_prompt;
285+
286+
if (!prompt.empty()) {
287+
prompt = chat_add_and_format("system", prompt);
288+
}
289+
} else {
284290
// otherwise use the prompt as is
285-
: params.prompt;
291+
prompt = params.prompt;
292+
}
293+
286294
if (params.interactive_first || !params.prompt.empty() || session_tokens.empty()) {
287295
LOG_DBG("tokenize the prompt\n");
288296
embd_inp = common_tokenize(ctx, prompt, true, true);
@@ -296,7 +304,7 @@ int main(int argc, char ** argv) {
296304
}
297305

298306
// Should not run without any tokens
299-
if (embd_inp.empty()) {
307+
if (!params.conversation_mode && embd_inp.empty()) {
300308
if (add_bos) {
301309
embd_inp.push_back(llama_vocab_bos(vocab));
302310
LOG_WRN("embd_inp was considered empty and bos was added: %s\n", string_from(ctx, embd_inp).c_str());
@@ -777,7 +785,7 @@ int main(int argc, char ** argv) {
777785
}
778786

779787
// deal with end of generation tokens in interactive mode
780-
if (llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
788+
if (!waiting_for_first_input && llama_vocab_is_eog(vocab, common_sampler_last(smpl))) {
781789
LOG_DBG("found an EOG token\n");
782790

783791
if (params.interactive) {
@@ -797,12 +805,12 @@ int main(int argc, char ** argv) {
797805
}
798806

799807
// if current token is not EOG, we add it to current assistant message
800-
if (params.conversation_mode) {
808+
if (params.conversation_mode && !waiting_for_first_input) {
801809
const auto id = common_sampler_last(smpl);
802810
assistant_ss << common_token_to_piece(ctx, id, false);
803811
}
804812

805-
if (n_past > 0 && is_interacting) {
813+
if ((n_past > 0 || waiting_for_first_input) && is_interacting) {
806814
LOG_DBG("waiting for user input\n");
807815

808816
if (params.conversation_mode) {
@@ -892,11 +900,12 @@ int main(int argc, char ** argv) {
892900
input_echo = false; // do not echo this again
893901
}
894902

895-
if (n_past > 0) {
903+
if (n_past > 0 || waiting_for_first_input) {
896904
if (is_interacting) {
897905
common_sampler_reset(smpl);
898906
}
899907
is_interacting = false;
908+
waiting_for_first_input = false;
900909
}
901910
}
902911

0 commit comments

Comments
 (0)