From 75a9d893c3feb666bfba3bae8c4a2fb611af2295 Mon Sep 17 00:00:00 2001 From: Laura Date: Fri, 15 Aug 2025 03:38:24 +0200 Subject: [PATCH] tweaks --- README.md | 1 + chat.go | 92 ++++++++++++++++++++++++++++++++++++++++++- debug.go | 14 +++++++ models.go | 3 ++ static/js/markdown.js | 8 +++- 5 files changed, 115 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index da871df..ccbf3db 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ go build -o whiskr - Adjust model, temperature, prompt, or message role from the controls in the bottom-left - Use the model search field to quickly find models (supports fuzzy matching) - Look for tags in the model list to see if a model supports tools, vision, or reasoning +- Use `![alt](url)` in your message to display an image inline. If the model supports vision, the same image URL is passed to the model for multimodal input. ## License diff --git a/chat.go b/chat.go index 70e778b..5bf60aa 100644 --- a/chat.go +++ b/chat.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "net/http" + "regexp" "strings" "github.com/revrost/go-openrouter" @@ -117,13 +118,26 @@ func (r *Request) Parse() (*openrouter.ChatCompletionRequest, error) { for index, message := range r.Messages { switch message.Role { - case "system", "user": + case "system": request.Messages = append(request.Messages, openrouter.ChatCompletionMessage{ Role: message.Role, Content: openrouter.Content{ Text: message.Text, }, }) + case "user": + var content openrouter.Content + + if model.Vision && strings.Contains(message.Text, "![") { + content.Multi = SplitImagePairs(message.Text) + } else { + content.Text = message.Text + } + + request.Messages = append(request.Messages, openrouter.ChatCompletionMessage{ + Role: message.Role, + Content: content, + }) case "assistant": msg := openrouter.ChatCompletionMessage{ Role: openrouter.ChatMessageRoleAssistant, @@ -151,7 +165,7 @@ func (r *Request) Parse() (*openrouter.ChatCompletionRequest, error) { } func HandleChat(w http.ResponseWriter, r *http.Request) { - debug("new chat") + debug("parsing chat") var raw Request @@ -174,6 +188,9 @@ func HandleChat(w http.ResponseWriter, r *http.Request) { request.Stream = true + dump("debug.json", request) + debug("preparing stream") + response, err := NewStream(w) if err != nil { RespondJson(w, http.StatusBadRequest, map[string]any{ @@ -325,3 +342,74 @@ func RunCompletion(ctx context.Context, response *Stream, request *openrouter.Ch return tool, result.String(), nil } + +func SplitImagePairs(text string) []openrouter.ChatMessagePart { + rgx := regexp.MustCompile(`(?m)!\[[^\]]*]\((\S+?)\)`) + + var ( + index int + parts []openrouter.ChatMessagePart + ) + + push := func(str, end int) { + rest := text[str:end] + + if rest == "" { + return + } + + total := len(parts) + + if total > 0 && parts[total-1].Type == openrouter.ChatMessagePartTypeText { + parts[total-1].Text += rest + + return + } + + parts = append(parts, openrouter.ChatMessagePart{ + Type: openrouter.ChatMessagePartTypeText, + Text: rest, + }) + } + + for { + location := rgx.FindStringSubmatchIndex(text[index:]) + if location == nil { + push(index, len(text)-1) + + break + } + + start := index + location[0] + end := index + location[1] + + urlStart := index + location[2] + urlEnd := index + location[3] + + url := text[urlStart:urlEnd] + + if !strings.HasPrefix(url, "https://") && !strings.HasPrefix(url, "http://") { + push(index, end) + + index = end + + continue + } + + if start > index { + push(index, start) + } + + parts = append(parts, openrouter.ChatMessagePart{ + Type: openrouter.ChatMessagePartTypeImageURL, + ImageURL: &openrouter.ChatMessageImageURL{ + Detail: openrouter.ImageURLDetailAuto, + URL: url, + }, + }) + + index = end + } + + return parts +} diff --git a/debug.go b/debug.go index c672f1b..7f3dd98 100644 --- a/debug.go +++ b/debug.go @@ -1,5 +1,19 @@ package main +import ( + "encoding/json" + "os" +) + +func dump(name string, val any) { + if !Debug { + return + } + + b, _ := json.MarshalIndent(val, "", "\t") + os.WriteFile(name, b, 0644) +} + func debug(format string, args ...any) { if !Debug { return diff --git a/models.go b/models.go index 84411d8..c0e36ec 100644 --- a/models.go +++ b/models.go @@ -15,6 +15,7 @@ type Model struct { Tags []string `json:"tags,omitempty"` Reasoning bool `json:"-"` + Vision bool `json:"-"` JSON bool `json:"-"` Tools bool `json:"-"` } @@ -78,6 +79,8 @@ func GetModelTags(model openrouter.Model, m *Model) { for _, modality := range model.Architecture.InputModalities { if modality == "image" { + m.Vision = true + m.Tags = append(m.Tags, "vision") } } diff --git a/static/js/markdown.js b/static/js/markdown.js index 10b27d0..9fe8697 100644 --- a/static/js/markdown.js +++ b/static/js/markdown.js @@ -10,7 +10,13 @@ walkTokens: (token) => { const { type, lang, text } = token; - if (type !== "code") { + if (type === "html") { + token.text = token.text.replace(/&/g, "&") + token.text = token.text.replace(//g, ">") + + return; + } else if (type !== "code") { return; }