Skip to content

Commit 0e74db7

Browse files
committed
fixed another tts bug, clblast selection and quiet mode
1 parent 1cb9805 commit 0e74db7

File tree

4 files changed

+49
-45
lines changed

4 files changed

+49
-45
lines changed

gpttype_adapter.cpp

+29-29
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ static kcpp_params * kcpp_data = nullptr;
106106
static int max_context_limit_at_load = 0;
107107
static int n_past = 0;
108108
static int debugmode = 0; //-1 = hide all, 0 = normal, 1 = showall
109+
static bool quiet = false;
109110
static std::vector<gpt_vocab::id> last_n_tokens;
110111
static std::vector<gpt_vocab::id> current_context_tokens;
111112
static size_t mem_per_token = 0;
@@ -930,12 +931,12 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
930931

931932
if(last_idx>1) //if there are 2 or more viable candidates
932933
{
933-
if (debugmode==1) {
934+
if (debugmode==1 && !quiet) {
934935
printf("XTC penalties [");
935936
}
936937
// then remove all other tokens above threshold EXCEPT the least likely one
937938
for (size_t i = 0; i < last_idx - 1; ++i) {
938-
if (debugmode==1)
939+
if (debugmode==1 && !quiet)
939940
{
940941
gpt_vocab::id token = candidates->data[i].id;
941942
std::string tokenizedstr = FileFormatTokenizeID(token, file_format);
@@ -944,7 +945,7 @@ void sample_xtc(llama_token_data_array * candidates, float xtc_threshold, float
944945
}
945946
candidates->data[i].logit -= 999.0f; //infinity gets wonky results downstream, this hack works well enough
946947
}
947-
if (debugmode==1) {
948+
if (debugmode==1 && !quiet) {
948949
printf("]\n");
949950
}
950951
candidates->sorted = false;
@@ -1133,7 +1134,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11331134
max_exponent = FLOAT_MAX_LOG / std::log(penalty_base);
11341135
}
11351136

1136-
if (debugmode==1 && !dry_max_token_repeat.empty()) {
1137+
if (debugmode==1 && !quiet && !dry_max_token_repeat.empty()) {
11371138
printf("DRY penalties [");
11381139
}
11391140
size_t count = 0;
@@ -1144,7 +1145,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11441145
repeat_exp = max_exponent;
11451146
}
11461147
float penalty = penalty_multiplier * pow(penalty_base, repeat_exp);
1147-
if (debugmode==1)
1148+
if (debugmode==1 && !quiet)
11481149
{
11491150
std::string tokenizedstr = FileFormatTokenizeID(token, file_format);
11501151
::utreplace(tokenizedstr, "\n", "\\n");
@@ -1157,7 +1158,7 @@ void sample_dry(int n_ctx, int penalty_range, float penalty_multiplier, float pe
11571158
{
11581159
candidates->sorted = false;
11591160
}
1160-
if (debugmode==1 && !dry_max_token_repeat.empty()) {
1161+
if (debugmode==1 && !quiet && !dry_max_token_repeat.empty()) {
11611162
printf("]\n");
11621163
}
11631164
}
@@ -1688,7 +1689,7 @@ static void load_grammar(const std::string & gammarstr)
16881689
printf("\nIgnored invalid grammar sampler.");
16891690
return;
16901691
}
1691-
if(debugmode==1)
1692+
if(debugmode==1 && !quiet)
16921693
{
16931694
parsed_grammar.print(stderr);
16941695
}
@@ -1831,7 +1832,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18311832
float chi_ctx_value = (n_ctx_desired * ctx_multiplier) / 6.28318;
18321833
float gradient_ai_rope_freq_base_value = powf(original_rope_base, log10f(chi_ctx_value) / log10f(chi_ctx_train_value));
18331834

1834-
if(debugmode==1)
1835+
if(debugmode==1 && !quiet)
18351836
{
18361837
printf("Trained max context length (value:%.d).\n", n_ctx_train);
18371838
printf("Desired context length (value:%.d).\n", n_ctx_desired);
@@ -1848,7 +1849,7 @@ static float CalcGradientAIRopeFreqBase(float original_rope_base, int n_ctx_trai
18481849
{
18491850
float extended_rope_positive_offset_value = 1 + ((log10f(chi_ctx_value) - log10f(chi_ctx_train_value)) / ((log10f(chi_ctx_value) * log10f(chi_ctx_train_value)) - (log10f(chi_ctx_value) + log10f(chi_ctx_train_value))));
18501851
float rope_freq_base_with_positive_offset = gradient_ai_rope_freq_base_value * extended_rope_positive_offset_value;
1851-
if(debugmode==1)
1852+
if(debugmode==1 && !quiet)
18521853
{
18531854
printf("Extended RoPE Positive Offset (multiplicator) for Solar based models. (value:%.3f).\n", extended_rope_positive_offset_value);
18541855
printf("RoPE base calculated via Gradient AI formula for Solar based models. (value:%.1f).\n", rope_freq_base_with_positive_offset);
@@ -2679,13 +2680,13 @@ std::vector<int> gpttype_get_token_arr(const std::string & input, bool addbos)
26792680
printf("\nWarning: KCPP text generation not initialized!\n");
26802681
return toks;
26812682
}
2682-
if(debugmode==1)
2683+
if(debugmode==1 && !quiet)
26832684
{
26842685
printf("\nFileFormat: %d, Tokenizing: %s",file_format ,input.c_str());
26852686
}
26862687
TokenizeString(input, toks, file_format,addbos);
26872688
int tokcount = toks.size();
2688-
if(debugmode==1)
2689+
if(debugmode==1 && !quiet)
26892690
{
26902691
printf("\nTokens Counted: %d\n",tokcount);
26912692
}
@@ -2770,6 +2771,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
27702771
llama_perf_context_reset(llama_ctx_v4);
27712772
}
27722773

2774+
quiet = inputs.quiet;
27732775
generation_finished = false; // Set current generation status
27742776
generated_tokens.clear(); // New Generation, new tokens
27752777
delayed_generated_tokens.clear();
@@ -2848,7 +2850,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
28482850
banned_token_ids.clear();
28492851
if(banned_tokens.size()>0)
28502852
{
2851-
if(debugmode==1)
2853+
if(debugmode==1 && !quiet)
28522854
{
28532855
printf("\nBanning %zu single character sequences...",banned_tokens.size());
28542856
}
@@ -2865,13 +2867,13 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
28652867
}
28662868
}
28672869
}
2868-
if(debugmode==1)
2870+
if(debugmode==1 && !quiet)
28692871
{
28702872
printf("\nBanned a total of %zu individual tokens.\n",banned_token_ids.size());
28712873
}
28722874
}
28732875

2874-
if(debugmode==1 && banned_phrases.size()>0)
2876+
if(debugmode==1 && !quiet && banned_phrases.size()>0)
28752877
{
28762878
printf("\nBanned a total of %zu phrases, with max token count of %d.\n",banned_phrases.size(),delayed_generated_tokens_limit);
28772879
}
@@ -2916,7 +2918,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29162918
//images have changed. swap identifiers to force reprocessing
29172919
current_llava_identifier = (current_llava_identifier==LLAVA_TOKEN_IDENTIFIER_A?LLAVA_TOKEN_IDENTIFIER_B:LLAVA_TOKEN_IDENTIFIER_A);
29182920
llava_composite_image_signature = new_llava_composite;
2919-
if(debugmode==1)
2921+
if(debugmode==1 && !quiet)
29202922
{
29212923
printf("\nLLAVA images changed, existing cache invalidated");
29222924
}
@@ -2972,7 +2974,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29722974
const int MAX_CHAR_LEN = 40;
29732975
const int MAX_SEQ_LEN = 20;
29742976

2975-
if (debugmode == 1)
2977+
if (debugmode == 1 && !quiet)
29762978
{
29772979
printf("\nProcessing %zu dry break strings...", kcpp_data->dry_sequence_breakers.size());
29782980
}
@@ -2984,7 +2986,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
29842986
}
29852987
GetOverlappingTokenSequences(sequence_break, dry_sequence_breakers, MAX_SEQ_LEN);
29862988
}
2987-
if (debugmode == 1)
2989+
if (debugmode == 1 && !quiet)
29882990
{
29892991
int trivial = 0, non_trivial = 0;
29902992
for (const auto &seq : dry_sequence_breakers)
@@ -3004,9 +3006,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30043006
}
30053007

30063008
bool stream_sse = inputs.stream_sse;
3007-
3008-
bool allow_regular_prints = (debugmode!=-1 && !inputs.quiet) || debugmode >= 1;
3009-
3009+
bool allow_regular_prints = (!quiet && debugmode!=-1);
30103010

30113011
std::string grammarstr = inputs.grammar;
30123012
bool grammar_retain_state = inputs.grammar_retain_state;
@@ -3039,7 +3039,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30393039
if (kcpp_data->seed <= 0 || kcpp_data->seed==0xFFFFFFFF)
30403040
{
30413041
kcpp_data->seed = (((uint32_t)time(NULL)) % 1000000u);
3042-
if(debugmode==1)
3042+
if(debugmode==1 && !quiet)
30433043
{
30443044
printf("\nUsing Seed: %d",kcpp_data->seed);
30453045
}
@@ -3071,15 +3071,15 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
30713071
}
30723072
else
30733073
{
3074-
if(debugmode==1)
3074+
if(debugmode==1 && !quiet)
30753075
{
30763076
printf("\nCreating clip image embed...");
30773077
}
30783078
llava_images[i].clp_image_tokens = 0;
30793079
if (!llava_image_embed_make_with_clip_img(clp_ctx, kcpp_data->n_threads, clp_img_data, &llava_images[i].clp_img_embd, &llava_images[i].clp_image_tokens)) {
30803080
printf("\nError: Clip image %d failed to create embd!",i);
30813081
}
3082-
if(debugmode==1)
3082+
if(debugmode==1 && !quiet)
30833083
{
30843084
printf("\nLLAVA Clip Embed %i used Tokens: %d",i,llava_images[i].clp_image_tokens);
30853085
}
@@ -3202,7 +3202,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
32023202
std::fill(last_n_tokens.begin(), last_n_tokens.end(), 0);
32033203
n_past = 0;
32043204

3205-
if (debugmode==1)
3205+
if (debugmode==1 && !quiet)
32063206
{
32073207
std::string outstr = "";
32083208
printf("\n\n[Debug: Dump Raw Input Tokens, format: %d]\n", file_format);
@@ -3347,7 +3347,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33473347
printf("\n");
33483348
}
33493349

3350-
if (debugmode==1)
3350+
if (debugmode==1 && !quiet)
33513351
{
33523352
std::string outstr = "";
33533353
printf("\n[Debug: Dump Forwarded Input Tokens, format: %d]\n", file_format);
@@ -3396,7 +3396,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
33963396
draft_used = true;
33973397
draft_results = speculative_decoding_eval_chunk(draft_ctx, llama_ctx_v4, embd, n_vocab, n_past);
33983398
evalres = draft_results.draft_success;
3399-
if(debugmode==1)
3399+
if(debugmode==1 && !quiet)
34003400
{
34013401
std::string draftedtoks = get_tok_vec_str(draft_results.draftids);
34023402
printf("\nDrafted %d Tokens: [%s]\n",speculative_chunk_amt,draftedtoks.c_str());
@@ -3599,7 +3599,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
35993599
if(draft_used)
36003600
{
36013601
int32_t draftedid = draft_results.draftids[logits_sampled];
3602-
if(debugmode==1)
3602+
if(debugmode==1 && !quiet)
36033603
{
36043604
std::string drafttok = FileFormatTokenizeID(draftedid, file_format, true);
36053605
std::string realtok = FileFormatTokenizeID(id, file_format, true);
@@ -3652,7 +3652,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
36523652
{
36533653
printf("\rGenerating (%d / %d tokens)", (kcpp_data->n_predict - remaining_tokens), kcpp_data->n_predict);
36543654
}
3655-
if(debugmode==1 && top_picks_history.size()>0)
3655+
if(debugmode==1 && !quiet && top_picks_history.size()>0)
36563656
{
36573657
printf(" [");
36583658
bool firstloop = true;
@@ -3904,7 +3904,7 @@ generation_outputs gpttype_generate(const generation_inputs inputs)
39043904
delayed_generated_tokens.pop_front();
39053905
}
39063906

3907-
if(debugmode==1 && file_format == FileFormat::GGUF_GENERIC)
3907+
if(debugmode==1 && !quiet && file_format == FileFormat::GGUF_GENERIC)
39083908
{
39093909
printf("\n");
39103910
llama_perf_context_print(llama_ctx_v4);

koboldcpp.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959
modelbusy = threading.Lock()
6060
requestsinqueue = 0
6161
defaultport = 5001
62-
KcppVersion = "1.82.2"
62+
KcppVersion = "1.82.3"
6363
showdebug = True
6464
guimode = False
6565
showsamplerwarning = True
@@ -388,7 +388,7 @@ def pick_existant_file(ntoption,nonntoption):
388388
(lib_vulkan_noavx2, "Use Vulkan (Old CPU)"),
389389
(lib_clblast_noavx2, "Use CLBlast (Older CPU)"),
390390
(lib_failsafe, "Failsafe Mode (Older CPU)")]
391-
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, clblast_noavx2_option, vulkan_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
391+
default_option, clblast_option, cublas_option, hipblas_option, vulkan_option, noavx2_option, vulkan_noavx2_option, clblast_noavx2_option, failsafe_option = (opt if file_exists(lib) or (os.name == 'nt' and file_exists(opt + ".dll")) else None for lib, opt in lib_option_pairs)
392392
runopts = [opt for lib, opt in lib_option_pairs if file_exists(lib)]
393393

394394
def init_library():
@@ -596,6 +596,8 @@ def exit_with_error(code, message, title="Error"):
596596
sys.exit(code)
597597

598598
def utfprint(str, importance = 2): #0 = only debugmode, 1 = except quiet, 2 = always print
599+
if args.quiet and importance<2: #quiet overrides debugmode
600+
return
599601
if args.debugmode < 1:
600602
if importance==1 and (args.debugmode == -1 or args.quiet):
601603
return

otherarch/tts_adapter.cpp

+9-9
Original file line numberDiff line numberDiff line change
@@ -152,9 +152,10 @@ static std::vector<float> embd_to_audio(
152152
const int n_codes,
153153
const int n_embd,
154154
const int n_thread) {
155-
const int n_hop = 600;
156-
const int n_fft = n_hop*4; //its 1280 at 320, or 2400 at 600
157-
const int n_win = n_hop*4;
155+
156+
const int n_fft = 1280; //its 1280 at 320, or 2400 at 600
157+
const int n_hop = 320;
158+
const int n_win = 1280;
158159
const int n_pad = (n_win - n_hop)/2;
159160
const int n_out = (n_codes - 1)*n_hop + n_win;
160161

@@ -622,7 +623,7 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
622623
{
623624
audio_seed = (((uint32_t)time(NULL)) % 1000000u);
624625
}
625-
if(ttsdebugmode==1)
626+
if(ttsdebugmode==1 && !inputs.quiet)
626627
{
627628
printf("\nUsing Speaker Seed: %d", speaker_seed);
628629
printf("\nUsing Audio Seed: %d", audio_seed);
@@ -638,13 +639,12 @@ tts_generation_outputs ttstype_generate(const tts_generation_inputs inputs)
638639
&& last_generated_audio!=""
639640
&& last_generation_settings_prompt == std::string(inputs.prompt))
640641
{
641-
if(ttsdebugmode==1 || !inputs.quiet)
642-
{
642+
if (ttsdebugmode == 1 && !inputs.quiet) {
643643
printf("\nReusing Cached Audio.\n");
644-
output.data = last_generated_audio.c_str();
645-
output.status = 1;
646-
return output;
647644
}
645+
output.data = last_generated_audio.c_str();
646+
output.status = 1;
647+
return output;
648648
}
649649

650650

otherarch/whispercpp/whisper_adapter.cpp

+7-5
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#endif
2525

2626
static int whisperdebugmode = 0;
27+
static bool whisperquiet = false;
2728
static whisper_context * whisper_ctx = nullptr;
2829
static std::string whisper_output_text = "";
2930

@@ -89,7 +90,7 @@ static bool read_wav(const std::string & b64data, std::vector<float>& pcmf32, st
8990
std::vector<float> raw_pcm;
9091
raw_pcm.resize(n);
9192

92-
if(whisperdebugmode==1)
93+
if(whisperdebugmode==1 && !whisperquiet)
9394
{
9495
printf("\nwav_data_size: %d, n:%d",wav_data.size(),n);
9596
}
@@ -106,7 +107,7 @@ static bool read_wav(const std::string & b64data, std::vector<float>& pcmf32, st
106107
}
107108

108109
if (wav.sampleRate != COMMON_SAMPLE_RATE) {
109-
if(whisperdebugmode==1)
110+
if(whisperdebugmode==1 && !whisperquiet)
110111
{
111112
printf("\nResample wav from %" PRIu32 " to %" PRIu32 " (in size: %zu)",
112113
wav.sampleRate, COMMON_SAMPLE_RATE, raw_pcm.size());
@@ -202,7 +203,8 @@ whisper_generation_outputs whispertype_generate(const whisper_generation_inputs
202203
return output;
203204
}
204205

205-
if(!inputs.quiet)
206+
whisperquiet = inputs.quiet;
207+
if(!whisperquiet)
206208
{
207209
printf("\nWhisper Transcribe Generating...");
208210
}
@@ -261,14 +263,14 @@ whisper_generation_outputs whispertype_generate(const whisper_generation_inputs
261263
return output;
262264
}
263265

264-
if (!inputs.quiet && whisperdebugmode==1) {
266+
if (!whisperquiet && whisperdebugmode==1) {
265267
whisper_print_timings(whisper_ctx);
266268
}
267269

268270
// output text transcription
269271
whisper_output_text = output_txt(whisper_ctx, pcmf32s);
270272
std::string ts = get_timestamp_str();
271-
if(!inputs.quiet)
273+
if(!whisperquiet)
272274
{
273275
printf("\n[%s] Whisper Transcribe Output: %s",ts.c_str(),whisper_output_text.c_str());
274276
} else {

0 commit comments

Comments
 (0)