31
31
#pragma warning(disable: 4244 4267) // possible loss of data
32
32
#endif
33
33
34
- static const char * DEFAULT_SYSTEM_MESSAGE = " You are a helpful assistant" ;
35
-
36
34
static llama_context ** g_ctx;
37
35
static llama_model ** g_model;
38
36
static common_sampler ** g_smpl;
@@ -267,6 +265,7 @@ int main(int argc, char ** argv) {
267
265
268
266
std::vector<llama_token> embd_inp;
269
267
268
+ bool waiting_for_first_input = params.conversation_mode && params.enable_chat_template && params.system_prompt .empty ();
270
269
auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) {
271
270
common_chat_msg new_msg;
272
271
new_msg.role = role;
@@ -278,11 +277,20 @@ int main(int argc, char ** argv) {
278
277
};
279
278
280
279
{
281
- auto prompt = (params.conversation_mode && params.enable_chat_template )
282
- // format the system prompt in conversation mode (fallback to default if empty)
283
- ? chat_add_and_format (" system" , params.system_prompt .empty () ? DEFAULT_SYSTEM_MESSAGE : params.system_prompt )
280
+ std::string prompt;
281
+
282
+ if (params.conversation_mode && params.enable_chat_template ) {
283
+ // format the system prompt in conversation mode (will use template default if empty)
284
+ prompt = params.system_prompt ;
285
+
286
+ if (!prompt.empty ()) {
287
+ prompt = chat_add_and_format (" system" , prompt);
288
+ }
289
+ } else {
284
290
// otherwise use the prompt as is
285
- : params.prompt ;
291
+ prompt = params.prompt ;
292
+ }
293
+
286
294
if (params.interactive_first || !params.prompt .empty () || session_tokens.empty ()) {
287
295
LOG_DBG (" tokenize the prompt\n " );
288
296
embd_inp = common_tokenize (ctx, prompt, true , true );
@@ -296,7 +304,7 @@ int main(int argc, char ** argv) {
296
304
}
297
305
298
306
// Should not run without any tokens
299
- if (embd_inp.empty ()) {
307
+ if (!params. conversation_mode && embd_inp.empty ()) {
300
308
if (add_bos) {
301
309
embd_inp.push_back (llama_vocab_bos (vocab));
302
310
LOG_WRN (" embd_inp was considered empty and bos was added: %s\n " , string_from (ctx, embd_inp).c_str ());
@@ -777,7 +785,7 @@ int main(int argc, char ** argv) {
777
785
}
778
786
779
787
// deal with end of generation tokens in interactive mode
780
- if (llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
788
+ if (!waiting_for_first_input && llama_vocab_is_eog (vocab, common_sampler_last (smpl))) {
781
789
LOG_DBG (" found an EOG token\n " );
782
790
783
791
if (params.interactive ) {
@@ -797,12 +805,12 @@ int main(int argc, char ** argv) {
797
805
}
798
806
799
807
// if current token is not EOG, we add it to current assistant message
800
- if (params.conversation_mode ) {
808
+ if (params.conversation_mode && !waiting_for_first_input ) {
801
809
const auto id = common_sampler_last (smpl);
802
810
assistant_ss << common_token_to_piece (ctx, id, false );
803
811
}
804
812
805
- if (n_past > 0 && is_interacting) {
813
+ if (( n_past > 0 || waiting_for_first_input) && is_interacting) {
806
814
LOG_DBG (" waiting for user input\n " );
807
815
808
816
if (params.conversation_mode ) {
@@ -892,11 +900,12 @@ int main(int argc, char ** argv) {
892
900
input_echo = false ; // do not echo this again
893
901
}
894
902
895
- if (n_past > 0 ) {
903
+ if (n_past > 0 || waiting_for_first_input ) {
896
904
if (is_interacting) {
897
905
common_sampler_reset (smpl);
898
906
}
899
907
is_interacting = false ;
908
+ waiting_for_first_input = false ;
900
909
}
901
910
}
902
911
0 commit comments