Coverage for stackone_ai/meta_tools.py: 94%
114 statements
« prev ^ index » next coverage.py v7.11.0, created at 2025-12-24 09:48 +0000
« prev ^ index » next coverage.py v7.11.0, created at 2025-12-24 09:48 +0000
1"""Meta tools for dynamic tool discovery and execution"""
3from __future__ import annotations
5import json
6from typing import TYPE_CHECKING
8import bm25s
9import numpy as np
10from pydantic import BaseModel
12from stackone_ai.constants import DEFAULT_HYBRID_ALPHA
13from stackone_ai.models import ExecuteConfig, JsonDict, StackOneTool, ToolParameters
14from stackone_ai.utils.tfidf_index import TfidfDocument, TfidfIndex
16if TYPE_CHECKING:
17 from stackone_ai.models import Tools
20class MetaToolSearchResult(BaseModel):
21 """Result from meta_search_tools"""
23 name: str
24 description: str
25 score: float
28class ToolIndex:
29 """Hybrid BM25 + TF-IDF tool search index"""
31 def __init__(self, tools: list[StackOneTool], hybrid_alpha: float | None = None) -> None:
32 """Initialize tool index with hybrid search
34 Args:
35 tools: List of tools to index
36 hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided,
37 uses DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring
38 and has been shown to provide better tool discovery accuracy
39 (10.8% improvement in validation testing).
40 """
41 self.tools = tools
42 self.tool_map = {tool.name: tool for tool in tools}
43 # Use default if not provided, then clamp to [0, 1]
44 alpha = hybrid_alpha if hybrid_alpha is not None else DEFAULT_HYBRID_ALPHA
45 self.hybrid_alpha = max(0.0, min(1.0, alpha))
47 # Prepare corpus for both BM25 and TF-IDF
48 corpus = []
49 tfidf_docs = []
50 self.tool_names = []
52 for tool in tools:
53 # Extract category and action from tool name
54 parts = tool.name.split("_")
55 category = parts[0] if parts else ""
57 # Extract action types
58 action_types = ["create", "update", "delete", "get", "list", "search"]
59 actions = [p for p in parts if p in action_types]
61 # Combine name, description, category and tags for indexing
62 # For TF-IDF: use weighted approach similar to Node.js
63 tfidf_text = " ".join(
64 [
65 f"{tool.name} {tool.name} {tool.name}", # boost name
66 f"{category} {' '.join(actions)}",
67 tool.description,
68 " ".join(parts),
69 ]
70 )
72 # For BM25: simpler approach
73 bm25_text = " ".join(
74 [
75 tool.name,
76 tool.description,
77 category,
78 " ".join(parts),
79 " ".join(actions),
80 ]
81 )
83 corpus.append(bm25_text)
84 tfidf_docs.append(TfidfDocument(id=tool.name, text=tfidf_text))
85 self.tool_names.append(tool.name)
87 # Create BM25 index
88 self.bm25_retriever = bm25s.BM25()
89 corpus_tokens = bm25s.tokenize(corpus, stemmer=None, show_progress=False) # ty: ignore[invalid-argument-type]
90 self.bm25_retriever.index(corpus_tokens)
92 # Create TF-IDF index
93 self.tfidf_index = TfidfIndex()
94 self.tfidf_index.build(tfidf_docs)
96 def search(self, query: str, limit: int = 5, min_score: float = 0.0) -> list[MetaToolSearchResult]:
97 """Search for relevant tools using hybrid BM25 + TF-IDF
99 Args:
100 query: Natural language query
101 limit: Maximum number of results
102 min_score: Minimum relevance score (0-1)
104 Returns:
105 List of search results sorted by relevance
106 """
107 # Get more results initially to have better candidate pool for fusion
108 fetch_limit = max(50, limit)
110 # Tokenize query for BM25
111 query_tokens = bm25s.tokenize([query], stemmer=None, show_progress=False) # ty: ignore[invalid-argument-type]
113 # Search with BM25
114 bm25_results, bm25_scores = self.bm25_retriever.retrieve(
115 query_tokens, k=min(fetch_limit, len(self.tools))
116 )
118 # Search with TF-IDF
119 tfidf_results = self.tfidf_index.search(query, k=min(fetch_limit, len(self.tools)))
121 # Build score map for fusion
122 score_map: dict[str, dict[str, float]] = {}
124 # Add BM25 scores
125 for idx, score in zip(bm25_results[0], bm25_scores[0]):
126 tool_name = self.tool_names[idx]
127 # Normalize BM25 score to 0-1 range
128 normalized_score = float(1 / (1 + np.exp(-score / 10)))
129 # Clamp to [0, 1]
130 clamped_score = max(0.0, min(1.0, normalized_score))
131 score_map[tool_name] = {"bm25": clamped_score}
133 # Add TF-IDF scores
134 for result in tfidf_results:
135 if result.id not in score_map: 135 ↛ 136line 135 didn't jump to line 136 because the condition on line 135 was never true
136 score_map[result.id] = {}
137 score_map[result.id]["tfidf"] = result.score
139 # Fuse scores: hybrid_score = alpha * bm25 + (1 - alpha) * tfidf
140 fused_results: list[tuple[str, float]] = []
141 for tool_name, scores in score_map.items():
142 bm25_score = scores.get("bm25", 0.0)
143 tfidf_score = scores.get("tfidf", 0.0)
144 hybrid_score = self.hybrid_alpha * bm25_score + (1 - self.hybrid_alpha) * tfidf_score
145 fused_results.append((tool_name, hybrid_score))
147 # Sort by score descending
148 fused_results.sort(key=lambda x: x[1], reverse=True)
150 # Build final results
151 search_results = []
152 for tool_name, score in fused_results:
153 if score < min_score:
154 continue
156 tool = self.tool_map.get(tool_name)
157 if tool is None: 157 ↛ 158line 157 didn't jump to line 158 because the condition on line 157 was never true
158 continue
160 search_results.append(
161 MetaToolSearchResult(
162 name=tool.name,
163 description=tool.description,
164 score=score,
165 )
166 )
168 if len(search_results) >= limit:
169 break
171 return search_results
174def create_meta_search_tools(index: ToolIndex) -> StackOneTool:
175 """Create the meta_search_tools tool
177 Args:
178 index: Tool search index
180 Returns:
181 Meta tool for searching relevant tools
182 """
183 name = "meta_search_tools"
184 description = (
185 f"Searches for relevant tools based on a natural language query using hybrid BM25 + TF-IDF search "
186 f"(alpha={index.hybrid_alpha}). This tool should be called first to discover available tools "
187 f"before executing them."
188 )
190 parameters = ToolParameters(
191 type="object",
192 properties={
193 "query": {
194 "type": "string",
195 "description": (
196 "Natural language query describing what tools you need "
197 '(e.g., "tools for managing employees", "create time off request")'
198 ),
199 },
200 "limit": {
201 "type": "number",
202 "description": "Maximum number of tools to return (default: 5)",
203 "default": 5,
204 },
205 "minScore": {
206 "type": "number",
207 "description": "Minimum relevance score (0-1) to filter results (default: 0.0)",
208 "default": 0.0,
209 },
210 },
211 )
213 def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict:
214 """Execute the filter tool"""
215 # Parse arguments
216 if isinstance(arguments, str): 216 ↛ 217line 216 didn't jump to line 217 because the condition on line 216 was never true
217 kwargs = json.loads(arguments)
218 else:
219 kwargs = arguments or {}
221 query = kwargs.get("query", "")
222 limit = int(kwargs.get("limit", 5))
223 min_score = float(kwargs.get("minScore", 0.0))
225 # Search for tools
226 results = index.search(query, limit, min_score)
228 # Format results
229 tools_data = [
230 {
231 "name": r.name,
232 "description": r.description,
233 "score": r.score,
234 }
235 for r in results
236 ]
238 return {"tools": tools_data}
240 # Create execute config for the meta tool
241 execute_config = ExecuteConfig(
242 name=name,
243 method="POST",
244 url="", # Meta tools don't make HTTP requests
245 headers={},
246 )
248 # Create a wrapper class that delegates execute to our custom function
249 class MetaSearchTool(StackOneTool):
250 """Meta tool for searching relevant tools"""
252 def __init__(self) -> None:
253 super().__init__(
254 description=description,
255 parameters=parameters,
256 _execute_config=execute_config,
257 _api_key="", # Meta tools don't need API key
258 _account_id=None,
259 )
261 def execute(
262 self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None
263 ) -> JsonDict:
264 return execute_filter(arguments)
266 return MetaSearchTool()
269def create_meta_execute_tool(tools_collection: Tools) -> StackOneTool:
270 """Create the meta_execute_tool
272 Args:
273 tools_collection: Collection of tools to execute from
275 Returns:
276 Meta tool for executing discovered tools
277 """
278 name = "meta_execute_tool"
279 description = (
280 "Executes a tool by name with the provided parameters. "
281 "Use this after discovering tools with meta_search_tools."
282 )
284 parameters = ToolParameters(
285 type="object",
286 properties={
287 "toolName": {
288 "type": "string",
289 "description": "Name of the tool to execute",
290 },
291 "params": {
292 "type": "object",
293 "description": "Parameters to pass to the tool",
294 "additionalProperties": True,
295 },
296 },
297 )
299 def execute_tool(arguments: str | JsonDict | None = None) -> JsonDict:
300 """Execute the meta execute tool"""
301 # Parse arguments
302 if isinstance(arguments, str): 302 ↛ 303line 302 didn't jump to line 303 because the condition on line 302 was never true
303 kwargs = json.loads(arguments)
304 else:
305 kwargs = arguments or {}
307 tool_name = kwargs.get("toolName")
308 params = kwargs.get("params", {})
310 if not tool_name:
311 raise ValueError("toolName is required")
313 # Get the tool
314 tool = tools_collection.get_tool(tool_name)
315 if not tool:
316 raise ValueError(f"Tool '{tool_name}' not found")
318 # Execute the tool
319 return tool.execute(params)
321 # Create execute config for the meta tool
322 execute_config = ExecuteConfig(
323 name=name,
324 method="POST",
325 url="", # Meta tools don't make HTTP requests
326 headers={},
327 )
329 # Create a wrapper class that delegates execute to our custom function
330 class MetaExecuteTool(StackOneTool):
331 """Meta tool for executing discovered tools"""
333 def __init__(self) -> None:
334 super().__init__(
335 description=description,
336 parameters=parameters,
337 _execute_config=execute_config,
338 _api_key="", # Meta tools don't need API key
339 _account_id=None,
340 )
342 def execute(
343 self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None
344 ) -> JsonDict:
345 return execute_tool(arguments)
347 return MetaExecuteTool()