Skip to content

Commit

Permalink
merge
Browse files Browse the repository at this point in the history
Signed-off-by: Dave Lee <[email protected]>
  • Loading branch information
dave-gray101 committed Nov 2, 2024
2 parents 61b2a69 + 65c3df3 commit 5a31160
Show file tree
Hide file tree
Showing 9 changed files with 225 additions and 6 deletions.
4 changes: 2 additions & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@ DETECT_LIBS?=true
# llama.cpp versions
GOLLAMA_REPO?=https://github.com/go-skynet/go-llama.cpp
GOLLAMA_VERSION?=2b57a8ae43e4699d3dc5d1496a1ccd42922993be
CPPLLAMA_VERSION?=ab3d71f97f5b2915a229099777af00d3eada1d24
CPPLLAMA_VERSION?=418f5eef262cea07c2af4f45ee6a88d882221fcb

# go-rwkv version
RWKV_REPO?=https://github.com/donomii/go-rwkv.cpp
RWKV_VERSION?=661e7ae26d442f5cfebd2a0881b44e8c55949ec6

# whisper.cpp version
WHISPER_REPO?=https://github.com/ggerganov/whisper.cpp
WHISPER_CPP_VERSION?=aa037a60f32018f32e54be3531ec6cc7802899eb
WHISPER_CPP_VERSION?=0377596b77a3602e36430320cbe45f8c305ef04a

# bert.cpp version
BERT_REPO?=https://github.com/go-skynet/go-bert.cpp
Expand Down
13 changes: 11 additions & 2 deletions core/http/endpoints/localai/tts.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,13 +9,15 @@ import (
"github.com/gofiber/fiber/v2"
"github.com/mudler/LocalAI/core/schema"
"github.com/rs/zerolog/log"

"github.com/mudler/LocalAI/pkg/utils"
)

// TTSEndpoint is the OpenAI Speech API endpoint https://platform.openai.com/docs/api-reference/audio/createSpeech
//
// @Summary Generates audio from the input text.
// @Accept json
// @Produce audio/x-wav
// @Accept json
// @Produce audio/x-wav
// @Param request body schema.TTSRequest true "query params"
// @Success 200 {string} binary "generated audio/wav file"
// @Router /v1/audio/speech [post]
Expand Down Expand Up @@ -55,6 +57,13 @@ func TTSEndpoint(cl *config.BackendConfigLoader, ml *model.ModelLoader, appConfi
if err != nil {
return err
}

// Convert generated file to target format
filePath, err = utils.AudioConvert(filePath, input.Format)
if err != nil {
return err
}

return c.Download(filePath)
}
}
1 change: 1 addition & 0 deletions core/schema/localai.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ type TTSRequest struct {
Voice string `json:"voice" yaml:"voice"` // voice audio file or speaker id
Backend string `json:"backend" yaml:"backend"`
Language string `json:"language,omitempty" yaml:"language,omitempty"` // (optional) language to use with TTS model
Format string `json:"response_format,omitempty" yaml:"response_format,omitempty"` // (optional) output format
}

type StoresSet struct {
Expand Down
133 changes: 133 additions & 0 deletions docs/layouts/partials/docs/top-header.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
<!-- Top Header -->
<div id="top-header" class="top-header d-print-none">
<div class="header-bar d-flex justify-content-between">
<div class="d-flex align-items-center">
<a href='{{ with .Site.Params.docs.logoLinkURL }}{{ . }}{{ else }}{{ relLangURL "" }}{{ end }}' class="logo-icon me-3" aria-label="HomePage" alt="HomePage">
<div class="small">
{{ with resources.Get "images/logos/mark.svg" }}
{{ .Content | safeHTML }}
{{ end }}
</div>
<div class="big">
{{ with resources.Get "images/logos/logo.svg" }}
{{ .Content | safeHTML }}
{{ end }}
</div>
</a>
<button id="close-sidebar" class="btn btn-icon btn-soft">
<span class="material-icons size-20 menu-icon align-middle">menu</span>
</button>
{{ if and (.Site.Params.docsearch.appID) (.Site.Params.docsearch.apiKey) -}}
<span><div id="docsearch"></div></span>
{{ end }}
{{ if or (not (isset .Site.Params.flexsearch "enabled")) (eq .Site.Params.flexsearch.enabled true) -}}
{{ if and (.Site.Params.docsearch.appID) (.Site.Params.docsearch.apiKey) -}}
{{ else }}
<!-- <form class="flexsearch position-relative flex-grow-1 ms-2 me-lg-2 d-none">
<input id="flexsearch" class="form-control is-search" type="search" placeholder="{{ i18n "search_title" }}" aria-label="{{ i18n "search_title" }}" autocomplete="off">
<div id="suggestions" class="shadow bg-white rounded d-none"></div>
</form> -->
<button id="flexsearch-button" class="ms-3 btn btn-soft" data-bs-toggle="collapse" data-bs-target="#FlexSearchCollapse" aria-expanded="false" aria-controls="FlexSearchCollapse">
<span class="material-icons size-20 menu-icon align-middle">search</span>
<span class="flexsearch-button-placeholder ms-1 me-2 d-none d-sm-block">{{ i18n "search_title" }}</span>
<div class="d-none d-sm-block">
<span class="flexsearch-button-keys">
<kbd class="flexsearch-button-cmd-key">
<svg width="44" height="15"><path d="M2.118,11.5A1.519,1.519,0,0,1,1,11.042,1.583,1.583,0,0,1,1,8.815a1.519,1.519,0,0,1,1.113-.458h.715V6.643H2.118A1.519,1.519,0,0,1,1,6.185,1.519,1.519,0,0,1,.547,5.071,1.519,1.519,0,0,1,1,3.958,1.519,1.519,0,0,1,2.118,3.5a1.519,1.519,0,0,1,1.114.458A1.519,1.519,0,0,1,3.69,5.071v.715H5.4V5.071A1.564,1.564,0,0,1,6.976,3.5,1.564,1.564,0,0,1,8.547,5.071,1.564,1.564,0,0,1,6.976,6.643H6.261V8.357h.715a1.575,1.575,0,0,1,1.113,2.685,1.583,1.583,0,0,1-2.227,0A1.519,1.519,0,0,1,5.4,9.929V9.214H3.69v.715a1.519,1.519,0,0,1-.458,1.113A1.519,1.519,0,0,1,2.118,11.5Zm0-.857a.714.714,0,0,0,.715-.714V9.214H2.118a.715.715,0,1,0,0,1.429Zm4.858,0a.715.715,0,1,0,0-1.429H6.261v.715a.714.714,0,0,0,.715.714ZM3.69,8.357H5.4V6.643H3.69ZM2.118,5.786h.715V5.071a.714.714,0,0,0-.715-.714.715.715,0,0,0-.5,1.22A.686.686,0,0,0,2.118,5.786Zm4.143,0h.715a.715.715,0,0,0,.5-1.22.715.715,0,0,0-1.22.5Z" fill="currentColor"></path><path d="M12.4,11.475H11.344l3.879-7.95h1.056Z" fill="currentColor"></path><path d="M25.073,5.384l-.864.576a2.121,2.121,0,0,0-1.786-.923,2.207,2.207,0,0,0-2.266,2.326,2.206,2.206,0,0,0,2.266,2.325,2.1,2.1,0,0,0,1.782-.918l.84.617a3.108,3.108,0,0,1-2.622,1.293,3.217,3.217,0,0,1-3.349-3.317,3.217,3.217,0,0,1,3.349-3.317A3.046,3.046,0,0,1,25.073,5.384Z" fill="currentColor"></path><path d="M30.993,5.142h-2.07v5.419H27.891V5.142h-2.07V4.164h5.172Z" fill="currentColor"></path><path d="M34.67,4.164c1.471,0,2.266.658,2.266,1.851,0,1.087-.832,1.809-2.134,1.855l2.107,2.691h-1.28L33.591,7.87H33.07v2.691H32.038v-6.4Zm-1.6.969v1.8h1.572c.832,0,1.22-.3,1.22-.918s-.411-.882-1.22-.882Z" fill="currentColor"></path><path d="M42.883,10.561H38.31v-6.4h1.033V9.583h3.54Z" fill="currentColor"></path></svg>
</kbd>
<kbd class="flexsearch-button-key">
<svg width="15" height="15"><path d="M5.926,12.279H4.41L9.073,2.721H10.59Z" fill="currentColor"/></svg>
</kbd>
</span>
</div>
</button>
{{ end }}
{{ end -}}
</div>
<div class="d-flex align-items-center m-1">
<h5>Star us on GitHub !&nbsp;</h5>
<script async defer src="https://buttons.github.io/buttons.js"></script>
<a class="github-button" href="https://github.com/mudler/LocalAI" data-color-scheme="no-preference: light; light: light; dark: dark;" data-icon="octicon-star" data-size="large" data-show-count="true" aria-label="Star mudler/LocalAI on GitHub">Star</a>
</div>
<div class="d-flex align-items-center">
<ul class="list-unstyled mb-0">
{{ with $.Scratch.Get "social_list" }}
{{ range . }}
{{ $path := printf "images/social/%s.%s" . "svg" }}
<li class="list-inline-item mb-0">
<a href="{{ if eq . `rss` }} {{ `index.xml` | absURL }} {{ else }} https://{{ . }}.com/{{ index site.Params.social . }} {{ end }}" alt="{{ . }}" rel="noopener noreferrer" target="_blank">
<div class="btn btn-icon btn-default border-0">
{{ with resources.Get $path }}
{{ .Content | safeHTML }}
{{ end }}
</div>
</a>
</li>
{{ end }}
{{ end }}
</ul>
{{ if eq .Site.Params.docs.darkMode true -}}
<button id="mode" class="btn btn-icon btn-default ms-2" type="button" aria-label="Toggle user interface mode">
<span class="toggle-dark">
<svg xmlns="http://www.w3.org/2000/svg" height="30" width="30" viewBox="0 0 48 48" fill="currentColor">
<title>{{ i18n "enable_dark_mode" | default "Enable dark mode" }}</title>
<path d="M24 42q-7.5 0-12.75-5.25T6 24q0-7.5 5.25-12.75T24 6q.4 0 .85.025.45.025 1.15.075-1.8 1.6-2.8 3.95-1 2.35-1 4.95 0 4.5 3.15 7.65Q28.5 25.8 33 25.8q2.6 0 4.95-.925T41.9 22.3q.05.6.075.975Q42 23.65 42 24q0 7.5-5.25 12.75T24 42Zm0-3q5.45 0 9.5-3.375t5.05-7.925q-1.25.55-2.675.825Q34.45 28.8 33 28.8q-5.75 0-9.775-4.025T19.2 15q0-1.2.25-2.575.25-1.375.9-3.125-4.9 1.35-8.125 5.475Q9 18.9 9 24q0 6.25 4.375 10.625T24 39Zm-.2-14.85Z"/>
</svg>
</span>
<span class="toggle-light">
<svg xmlns="http://www.w3.org/2000/svg" height="30" width="30" viewBox="0 0 48 48" fill="currentColor">
<title>{{ i18n "enable_light_mode" | default "Enable light mode" }}</title>
<path d="M24 31q2.9 0 4.95-2.05Q31 26.9 31 24q0-2.9-2.05-4.95Q26.9 17 24 17q-2.9 0-4.95 2.05Q17 21.1 17 24q0 2.9 2.05 4.95Q21.1 31 24 31Zm0 3q-4.15 0-7.075-2.925T14 24q0-4.15 2.925-7.075T24 14q4.15 0 7.075 2.925T34 24q0 4.15-2.925 7.075T24 34ZM3.5 25.5q-.65 0-1.075-.425Q2 24.65 2 24q0-.65.425-1.075Q2.85 22.5 3.5 22.5h5q.65 0 1.075.425Q10 23.35 10 24q0 .65-.425 1.075-.425.425-1.075.425Zm36 0q-.65 0-1.075-.425Q38 24.65 38 24q0-.65.425-1.075.425-.425 1.075-.425h5q.65 0 1.075.425Q46 23.35 46 24q0 .65-.425 1.075-.425.425-1.075.425ZM24 10q-.65 0-1.075-.425Q22.5 9.15 22.5 8.5v-5q0-.65.425-1.075Q23.35 2 24 2q.65 0 1.075.425.425.425.425 1.075v5q0 .65-.425 1.075Q24.65 10 24 10Zm0 36q-.65 0-1.075-.425-.425-.425-.425-1.075v-5q0-.65.425-1.075Q23.35 38 24 38q.65 0 1.075.425.425.425.425 1.075v5q0 .65-.425 1.075Q24.65 46 24 46ZM12 14.1l-2.85-2.8q-.45-.45-.425-1.075.025-.625.425-1.075.45-.45 1.075-.45t1.075.45L14.1 12q.4.45.4 1.05 0 .6-.4 1-.4.45-1.025.45-.625 0-1.075-.4Zm24.7 24.75L33.9 36q-.4-.45-.4-1.075t.45-1.025q.4-.45 1-.45t1.05.45l2.85 2.8q.45.45.425 1.075-.025.625-.425 1.075-.45.45-1.075.45t-1.075-.45ZM33.9 14.1q-.45-.45-.45-1.05 0-.6.45-1.05l2.8-2.85q.45-.45 1.075-.425.625.025 1.075.425.45.45.45 1.075t-.45 1.075L36 14.1q-.4.4-1.025.4-.625 0-1.075-.4ZM9.15 38.85q-.45-.45-.45-1.075t.45-1.075L12 33.9q.45-.45 1.05-.45.6 0 1.05.45.45.45.45 1.05 0 .6-.45 1.05l-2.8 2.85q-.45.45-1.075.425-.625-.025-1.075-.425ZM24 24Z"/>
</svg>
</span>
</button>
{{ end -}}
{{ if .Site.IsMultiLingual }}
<div class="dropdown">
<button class="btn btn-link btn-default dropdown-toggle ps-2" type="button" data-bs-toggle="dropdown" aria-expanded="false">
{{ site.Language.Lang | upper }}
</button>
<ul class="dropdown-menu text-end">
{{ partial (printf "%s/%s" ($.Scratch.Get "pathName") "i18nlist") . }}
</ul>
</div>
{{ end }}
</div>
</div>
<!-- FlexSearch Input Start -->
{{ if or (not (isset .Site.Params.flexsearch "enabled")) (eq .Site.Params.flexsearch.enabled true) -}}
{{ if and (.Site.Params.docsearch.appID) (.Site.Params.docsearch.apiKey) -}}
{{ else }}
<div class="collapse" id="FlexSearchCollapse">
<div class="flexsearch-container">
<div class="flexsearch-keymap">
<li>
<kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Arrow down" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M7.5 3.5v8M10.5 8.5l-3 3-3-3"></path></g></svg></kbd>
<kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Arrow up" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M7.5 11.5v-8M10.5 6.5l-3-3-3 3"></path></g></svg></kbd>
<span class="flexsearch-key-label">{{ i18n "search_navigate" | default "to navigate" }}</span>
</li>
<li>
<kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Enter key" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M12 3.53088v3c0 1-1 2-2 2H4M7 11.53088l-3-3 3-3"></path></g></svg></kbd>
<span class="flexsearch-key-label">{{ i18n "search_select" | default "to select" }}</span>
</li>
<li>
<kbd class="flexsearch-button-cmd-key"><svg width="15" height="15" aria-label="Escape key" role="img"><g fill="none" stroke="currentColor" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.2"><path d="M13.6167 8.936c-.1065.3583-.6883.962-1.4875.962-.7993 0-1.653-.9165-1.653-2.1258v-.5678c0-1.2548.7896-2.1016 1.653-2.1016.8634 0 1.3601.4778 1.4875 1.0724M9 6c-.1352-.4735-.7506-.9219-1.46-.8972-.7092.0246-1.344.57-1.344 1.2166s.4198.8812 1.3445.9805C8.465 7.3992 8.968 7.9337 9 8.5c.032.5663-.454 1.398-1.4595 1.398C6.6593 9.898 6 9 5.963 8.4851m-1.4748.5368c-.2635.5941-.8099.876-1.5443.876s-1.7073-.6248-1.7073-2.204v-.4603c0-1.0416.721-2.131 1.7073-2.131.9864 0 1.6425 1.031 1.5443 2.2492h-2.956"></path></g></svg></kbd>
<span class="flexsearch-key-label">{{ i18n "search_close" | default "to close" }}</span>
</li>
</div>
<form class="flexsearch position-relative flex-grow-1 ms-2 me-2">
<div class="d-flex flex-row">
<input id="flexsearch" class="form-control" type="search" placeholder="{{ i18n "search_title" }}" aria-label="{{ i18n "search_title" }}" autocomplete="off">
<button id="hideFlexsearch" type="button" class="ms-2 btn btn-soft">
{{ i18n "search_cancel" | default "cancel" }}
</button>
</div>
<div id="suggestions" class="shadow rounded-1 d-none"></div>
</form>
</div>
</div>
{{ end }}
{{ end }}
<!-- FlexSearch Input End -->
</div>
<!-- Top Header -->
35 changes: 35 additions & 0 deletions gallery/index.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,23 @@
- filename: SmolLM-1.7B-Instruct.Q4_K_M.gguf
sha256: 2b07eb2293ed3fc544a9858beda5bfb03dcabda6aa6582d3c85768c95f498d28
uri: huggingface://MaziyarPanahi/SmolLM-1.7B-Instruct-GGUF/SmolLM-1.7B-Instruct.Q4_K_M.gguf
- !!merge <<: *smollm
name: "smollm2-1.7b-instruct"
icon: https://cdn-uploads.huggingface.co/production/uploads/61c141342aac764ce1654e43/y45hIMNREW7w_XpHYB_0q.png
urls:
- https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct
- https://huggingface.co/HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF
description: |
SmolLM2 is a family of compact language models available in three size: 135M, 360M, and 1.7B parameters. They are capable of solving a wide range of tasks while being lightweight enough to run on-device.
The 1.7B variant demonstrates significant advances over its predecessor SmolLM1-1.7B, particularly in instruction following, knowledge, reasoning, and mathematics. It was trained on 11 trillion tokens using a diverse dataset combination: FineWeb-Edu, DCLM, The Stack, along with new mathematics and coding datasets that we curated and will release soon. We developed the instruct version through supervised fine-tuning (SFT) using a combination of public datasets and our own curated datasets. We then applied Direct Preference Optimization (DPO) using UltraFeedback.
overrides:
parameters:
model: smollm2-1.7b-instruct-q4_k_m.gguf
files:
- filename: smollm2-1.7b-instruct-q4_k_m.gguf
sha256: decd2598bc2c8ed08c19adc3c8fdd461ee19ed5708679d1c54ef54a5a30d4f33
uri: huggingface://HuggingFaceTB/SmolLM2-1.7B-Instruct-GGUF/smollm2-1.7b-instruct-q4_k_m.gguf
- &llama31
## LLama3.1
url: "github:mudler/LocalAI/gallery/llama3.1-instruct.yaml@master"
Expand Down Expand Up @@ -2710,6 +2727,24 @@
- filename: Darkens-8B.Q4_K_M.gguf
sha256: f56a483e10fd00957460adfc16ee462cecac892a4fb44dc59e466e68a360fd42
uri: huggingface://QuantFactory/Darkens-8B-GGUF/Darkens-8B.Q4_K_M.gguf
- !!merge <<: *mistral03
url: "github:mudler/LocalAI/gallery/chatml.yaml@master"
name: "starcannon-unleashed-12b-v1.0"
icon: https://cdn-uploads.huggingface.co/production/uploads/6720ed503a24966ac66495e8/HXc0AxPLkoIC1fy0Pb3Pb.png
urls:
- https://huggingface.co/VongolaChouko/Starcannon-Unleashed-12B-v1.0
- https://huggingface.co/QuantFactory/Starcannon-Unleashed-12B-v1.0-GGUF
description: |
This is a merge of pre-trained language models created using mergekit.
MarinaraSpaghetti_NemoMix-Unleashed-12B
Nothingiisreal_MN-12B-Starcannon-v3
overrides:
parameters:
model: Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
files:
- filename: Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
sha256: b32c6582d75d2f1d67d567badc691a1338dd1a016c71efbfaf4c91812f398f0e
uri: huggingface://QuantFactory/Starcannon-Unleashed-12B-v1.0-GGUF/Starcannon-Unleashed-12B-v1.0.Q4_K_M.gguf
- &mudler
### START mudler's LocalAI specific-models
url: "github:mudler/LocalAI/gallery/mudler.yaml@master"
Expand Down
30 changes: 30 additions & 0 deletions pkg/utils/ffmpeg.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"fmt"
"os"
"os/exec"
"strings"
)

func ffmpegCommand(args []string) (string, error) {
Expand All @@ -23,3 +24,32 @@ func AudioToWav(src, dst string) error {
}
return nil
}

// AudioConvert converts generated wav file from tts to other output formats.
// TODO: handle pcm to have 100% parity of supported format from OpenAI
func AudioConvert(src string, format string) (string, error) {
extension := ""
// compute file extension from format, default to wav
switch format {
case "opus":
extension = ".ogg"
case "mp3", "aac", "flac":
extension = fmt.Sprintf(".%s", format)
default:
extension = ".wav"
}

// if .wav, do nothing
if extension == ".wav" {
return src, nil
}

// naive conversion based on default values and target extension of file
dst := strings.Replace(src, ".wav", extension, -1)
commandArgs := []string{"-y", "-i", src, "-vn", dst}
out, err := ffmpegCommand(commandArgs)
if err != nil {
return "", fmt.Errorf("error: %w out: %s", err, out)
}
return dst, nil
}
6 changes: 5 additions & 1 deletion swagger/docs.go
Original file line number Diff line number Diff line change
Expand Up @@ -1721,7 +1721,11 @@ const docTemplate = `{
"voice": {
"description": "voice audio file or speaker id",
"type": "string"
}
},
"response_format": {
"description": "(optional) output format of generated audio file, defaults to wav, accept wav, mp3, flac, aac, opus",
"type": "string"
},
}
},
"schema.ToolCall": {
Expand Down
6 changes: 5 additions & 1 deletion swagger/swagger.json
Original file line number Diff line number Diff line change
Expand Up @@ -1714,6 +1714,10 @@
"voice": {
"description": "voice audio file or speaker id",
"type": "string"
},
"response_format": {
"description": "(optional) output format of generated audio file, defaults to wav, accept wav, mp3, flac, aac, opus",
"type": "string"
}
}
},
Expand Down Expand Up @@ -1742,4 +1746,4 @@
"in": "header"
}
}
}
}
Loading

0 comments on commit 5a31160

Please sign in to comment.