From 3029d78fbf568d83ae25c11c99ab4c356a4dedb7 Mon Sep 17 00:00:00 2001 From: moeakwak Date: Mon, 2 Oct 2023 01:06:59 +0800 Subject: [PATCH] support multimodal_text content and attachments --- api/chatgpt/typings.go | 13 +++++++------ api/imitate/convert.go | 25 +++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/api/chatgpt/typings.go b/api/chatgpt/typings.go index 0d921a434..39ae391ed 100644 --- a/api/chatgpt/typings.go +++ b/api/chatgpt/typings.go @@ -27,14 +27,15 @@ func (c *CreateConversationRequest) AddMessage(role string, content string) { c.Messages = append(c.Messages, Message{ ID: uuid.New().String(), Author: Author{Role: role}, - Content: Content{ContentType: "text", Parts: []string{content}}, + Content: Content{ContentType: "text", Parts: []interface{}{content}}, }) } type Message struct { - Author Author `json:"author"` - Content Content `json:"content"` - ID string `json:"id"` + Author Author `json:"author"` + Content Content `json:"content"` + ID string `json:"id"` + Metadata interface{} `json:"metadata"` } type Author struct { @@ -42,8 +43,8 @@ type Author struct { } type Content struct { - ContentType string `json:"content_type"` - Parts []string `json:"parts"` + ContentType string `json:"content_type"` + Parts []interface{} `json:"parts"` } type CreateConversationResponse struct { diff --git a/api/imitate/convert.go b/api/imitate/convert.go index bac392d3d..33dbdd24d 100644 --- a/api/imitate/convert.go +++ b/api/imitate/convert.go @@ -1,15 +1,36 @@ package imitate import ( + "fmt" "strings" ) func ConvertToString(chatgptResponse *ChatGPTResponse, previousText *StringStruct, role bool, id string, model string) string { - text := strings.ReplaceAll(chatgptResponse.Message.Content.Parts[0], *&previousText.Text, "") + var text string + + if len(chatgptResponse.Message.Content.Parts) == 1 { + if part, ok := chatgptResponse.Message.Content.Parts[0].(string); ok { + text = strings.ReplaceAll(part, previousText.Text, "") + previousText.Text = part + } else { + text = fmt.Sprintf("%v", chatgptResponse.Message.Content.Parts[0]) + } + } else { + // When using GPT-4 messages with images (multimodal_text), the length of 'parts' might be 2. + // Since the chatgpt API currently does not support multimodal content + // and there is no official format for multimodal content, + // the content is temporarily returned as is. + var parts []string + for _, part := range chatgptResponse.Message.Content.Parts { + parts = append(parts, fmt.Sprintf("%v", part)) + } + text = strings.Join(parts, ", ") + } + translatedResponse := NewChatCompletionChunk(text, id, model) if role { translatedResponse.Choices[0].Delta.Role = chatgptResponse.Message.Author.Role } - previousText.Text = chatgptResponse.Message.Content.Parts[0] + return "data: " + translatedResponse.String() + "\n\n" }