From 87d33a8d1d6aa5c9e92e2584162394f810251a10 Mon Sep 17 00:00:00 2001 From: Laura Date: Sun, 31 Aug 2025 00:25:03 +0200 Subject: [PATCH] much improved search --- exa.go | 122 +++++++++++++++++++++++++++++---------------- internal/tools.txt | 14 +++++- search.go | 69 +++++++++++++++++++------ 3 files changed, 145 insertions(+), 60 deletions(-) diff --git a/exa.go b/exa.go index 5f1e94d..a95789d 100644 --- a/exa.go +++ b/exa.go @@ -7,15 +7,17 @@ import ( "fmt" "net/http" "strings" + "time" ) type ExaResult struct { - Title string `json:"title"` - URL string `json:"url"` - PublishedDate string `json:"publishedDate"` - - Text string `json:"text"` - Summary string `json:"summary"` + Title string `json:"title"` + URL string `json:"url"` + PublishedDate string `json:"publishedDate,omitempty"` + SiteName string `json:"siteName,omitempty"` + Summary string `json:"summary,omitempty"` + Highlights []string `json:"highlights,omitempty"` + Text string `json:"text,omitempty"` } type ExaCost struct { @@ -23,43 +25,20 @@ type ExaCost struct { } type ExaResults struct { - RequestID string `json:"requestId"` - Results []ExaResult `json:"results"` - Cost ExaCost `json:"costDollars"` -} - -func (e *ExaResult) String() string { - var ( - label string - text string - ) - - if e.Text != "" { - label = "Text" - text = e.Text - } else if e.Summary != "" { - label = "Summary" - text = e.Summary - } - - return fmt.Sprintf( - "Title: %s \nURL: %s \nPublished Date: %s \n%s: %s", - e.Title, - e.URL, - e.PublishedDate, - label, - strings.TrimSpace(text), - ) + RequestID string `json:"requestId"` + SearchType string `json:"resolvedSearchType"` + Results []ExaResult `json:"results"` + Cost ExaCost `json:"costDollars"` } func (e *ExaResults) String() string { - list := make([]string, len(e.Results)) + var builder strings.Builder - for i, result := range e.Results { - list[i] = result.String() - } + json.NewEncoder(&builder).Encode(map[string]any{ + "results": e.Results, + }) - return strings.Join(list, "\n\n---\n\n") + return builder.String() } func NewExaRequest(ctx context.Context, path string, data any) (*http.Request, error) { @@ -100,15 +79,62 @@ func RunExaRequest(req *http.Request) (*ExaResults, error) { } func ExaRunSearch(ctx context.Context, args SearchWebArguments) (*ExaResults, error) { + if args.NumResults <= 0 { + args.NumResults = 6 + } else if args.NumResults < 3 { + args.NumResults = 3 + } else if args.NumResults >= 12 { + args.NumResults = 12 + } + data := map[string]any{ "query": args.Query, "type": "auto", "numResults": args.NumResults, - "contents": map[string]any{ - "summary": map[string]any{ - "query": "Summarize this page only with all information directly relevant to answering the user's question: include key facts, numbers, dates, names, definitions, steps, code or commands, and the page's stance or conclusion; omit fluff and unrelated sections.", - }, + } + + if len(args.Domains) > 0 { + data["includeDomains"] = args.Domains + } + + contents := map[string]any{ + "summary": map[string]any{}, + "highlights": map[string]any{ + "numSentences": 2, + "highlightsPerUrl": 3, }, + "livecrawl": "preferred", + } + + switch args.Intent { + case "news": + data["category"] = "news" + data["numResults"] = max(8, args.NumResults) + data["startPublishedDate"] = daysAgo(30) + case "docs": + contents["subpages"] = 1 + contents["subpageTarget"] = []string{"documentation", "changelog", "release notes"} + case "papers": + data["category"] = "research paper" + data["startPublishedDate"] = daysAgo(365 * 2) + case "code": + data["category"] = "github" + + contents["subpages"] = 1 + contents["subpageTarget"] = []string{"readme", "changelog", "code"} + case "deep_read": + contents["text"] = map[string]any{ + "maxCharacters": 8000, + } + } + + data["contents"] = contents + + switch args.Recency { + case "month": + data["startPublishedDate"] = daysAgo(30) + case "year": + data["startPublishedDate"] = daysAgo(356) } req, err := NewExaRequest(ctx, "/search", data) @@ -121,10 +147,16 @@ func ExaRunSearch(ctx context.Context, args SearchWebArguments) (*ExaResults, er func ExaRunContents(ctx context.Context, args FetchContentsArguments) (*ExaResults, error) { data := map[string]any{ - "urls": args.URLs, + "urls": args.URLs, + "summary": map[string]any{}, + "highlights": map[string]any{ + "numSentences": 2, + "highlightsPerUrl": 3, + }, "text": map[string]any{ "maxCharacters": 8000, }, + "livecrawl": "preferred", } req, err := NewExaRequest(ctx, "/contents", data) @@ -134,3 +166,7 @@ func ExaRunContents(ctx context.Context, args FetchContentsArguments) (*ExaResul return RunExaRequest(req) } + +func daysAgo(days int) string { + return time.Now().Add(time.Duration(days) * 24 * time.Hour).Format(time.DateOnly) +} diff --git a/internal/tools.txt b/internal/tools.txt index 0bb39aa..ed19d90 100644 --- a/internal/tools.txt +++ b/internal/tools.txt @@ -1,3 +1,13 @@ -You have access to web search tools. Use `search_web` with `query` (string) and `num_results` (1-10) to find current information - when searching for recent/latest information, always include specific dates or years (e.g., "august 2025"). Use `fetch_contents` with `urls` (array) to read full page content from search results or known URLs. Use `github_repository` with `owner` (string) and `repo` (string) to get repository overviews (info, branches, files, README) without cloning. Formulate specific, targeted queries and provide all required parameters. Call only one tool per response. +# Tool use +Use at most 1 tool call per turn. You have %d turns with tool calls total. -You have %d tool calls available in total. \ No newline at end of file +search_web({query, num_results?, intent?, recency?, domains?}) +- Fresh info & citations. Keep query short; add month/year if freshness matters. +- intent: auto|news|docs|papers|code|deep_read (deep_read may include full text). +- num_results: default 6 (3-12); recency: auto|month|year. + +fetch_contents({urls}) +- Read 1-5 given URLs for exact content/quotes/numbers. + +github_repository({owner,repo}) +- Quick repo overview + README excerpt. \ No newline at end of file diff --git a/search.go b/search.go index 1f15ca6..6b198eb 100644 --- a/search.go +++ b/search.go @@ -11,8 +11,11 @@ import ( ) type SearchWebArguments struct { - Query string `json:"query"` - NumResults int `json:"num_results"` + Query string `json:"query"` + NumResults int `json:"num_results,omitempty"` + Intent string `json:"intent,omitempty"` + Recency string `json:"recency,omitempty"` + Domains []string `json:"domains,omitempty"` } type FetchContentsArguments struct { @@ -30,40 +33,60 @@ func GetSearchTools() []openrouter.Tool { Type: openrouter.ToolTypeFunction, Function: &openrouter.FunctionDefinition{ Name: "search_web", - Description: "Search the web via Exa in auto mode. Returns up to 10 results with short summaries.", + Description: "Search the live web (via Exa /search) and return summaries, highlights, and optionally full text for the top results.", Parameters: map[string]any{ "type": "object", - "required": []string{"query", "num_results"}, + "required": []string{"query"}, "properties": map[string]any{ "query": map[string]any{ "type": "string", - "description": "A concise, specific search query in natural language.", + "description": "A concise, specific search query in natural language. Include month/year if recency matters (e.g., 'august 2025').", }, "num_results": map[string]any{ "type": "integer", - "description": "Number of results to return (3-10). Default to 6.", + "description": "Number of results to return (3-12). Default is 6.", "minimum": 3, "maximum": 10, }, + "intent": map[string]any{ + "type": "string", + "enum": []string{"auto", "news", "docs", "papers", "code", "deep_read"}, + "description": "Search profile. Use 'news' for breaking topics, 'docs' for official docs/changelogs, 'papers' for research, 'code' for repos, 'deep_read' when you need exact quotes/numbers (adds full text). Default 'auto'.", + }, + "recency": map[string]any{ + "type": "string", + "enum": []string{"auto", "month", "year", "range"}, + "description": "Time filter hint. 'month' ~ last 30 days, 'year' ~ last 365 days. Default 'auto'.", + }, + "domains": map[string]any{ + "type": "array", + "items": map[string]any{ + "type": "string", + }, + "description": "Restrict to these domains (e.g., ['europa.eu', 'who.int']).", + }, }, "additionalProperties": false, }, - Strict: true, }, }, { Type: openrouter.ToolTypeFunction, Function: &openrouter.FunctionDefinition{ Name: "fetch_contents", - Description: "Fetch page contents for one or more URLs via Exa /contents.", + Description: "Fetch and summarize page contents for one or more URLs (via Exa /contents). Use when the user provides specific links.", Parameters: map[string]any{ "type": "object", "required": []string{"urls"}, "properties": map[string]any{ "urls": map[string]any{ "type": "array", - "description": "List of URLs (1..N) to fetch.", - "items": map[string]any{"type": "string"}, + "description": "List of URLs to fetch.", + "items": map[string]any{ + "type": "string", + }, + "minItems": 1, + "maxItems": 5, }, }, "additionalProperties": false, @@ -75,14 +98,14 @@ func GetSearchTools() []openrouter.Tool { Type: openrouter.ToolTypeFunction, Function: &openrouter.FunctionDefinition{ Name: "github_repository", - Description: "Get a quick overview of a GitHub repository without cloning: repo info, up to 20 branches (popular first), top-level files/dirs, and the README.", + Description: "Fetch repository metadata and README from GitHub.", Parameters: map[string]any{ "type": "object", "required": []string{"owner", "repo"}, "properties": map[string]any{ "owner": map[string]any{ "type": "string", - "description": "GitHub username or organization (e.g., 'torvalds').", + "description": "Repository owner (e.g., 'torvalds').", }, "repo": map[string]any{ "type": "string", @@ -100,7 +123,7 @@ func GetSearchTools() []openrouter.Tool { func HandleSearchWebTool(ctx context.Context, tool *ToolCall) error { var arguments SearchWebArguments - err := json.Unmarshal([]byte(tool.Args), &arguments) + err := ParseAndUpdateArgs(tool, &arguments) if err != nil { return err } @@ -132,7 +155,7 @@ func HandleSearchWebTool(ctx context.Context, tool *ToolCall) error { func HandleFetchContentsTool(ctx context.Context, tool *ToolCall) error { var arguments FetchContentsArguments - err := json.Unmarshal([]byte(tool.Args), &arguments) + err := ParseAndUpdateArgs(tool, &arguments) if err != nil { return err } @@ -164,7 +187,7 @@ func HandleFetchContentsTool(ctx context.Context, tool *ToolCall) error { func HandleGitHubRepositoryTool(ctx context.Context, tool *ToolCall) error { var arguments GitHubRepositoryArguments - err := json.Unmarshal([]byte(tool.Args), &arguments) + err := ParseAndUpdateArgs(tool, &arguments) if err != nil { return err } @@ -180,3 +203,19 @@ func HandleGitHubRepositoryTool(ctx context.Context, tool *ToolCall) error { return nil } + +func ParseAndUpdateArgs(tool *ToolCall, arguments any) error { + err := json.Unmarshal([]byte(tool.Args), arguments) + if err != nil { + return err + } + + b, err := json.Marshal(arguments) + if err != nil { + return err + } + + tool.Args = string(b) + + return nil +}