Coverage for stackone_ai/meta_tools.py: 94%

114 statements  

« prev     ^ index     » next       coverage.py v7.11.0, created at 2025-12-24 09:48 +0000

1"""Meta tools for dynamic tool discovery and execution""" 

2 

3from __future__ import annotations 

4 

5import json 

6from typing import TYPE_CHECKING 

7 

8import bm25s 

9import numpy as np 

10from pydantic import BaseModel 

11 

12from stackone_ai.constants import DEFAULT_HYBRID_ALPHA 

13from stackone_ai.models import ExecuteConfig, JsonDict, StackOneTool, ToolParameters 

14from stackone_ai.utils.tfidf_index import TfidfDocument, TfidfIndex 

15 

16if TYPE_CHECKING: 

17 from stackone_ai.models import Tools 

18 

19 

20class MetaToolSearchResult(BaseModel): 

21 """Result from meta_search_tools""" 

22 

23 name: str 

24 description: str 

25 score: float 

26 

27 

28class ToolIndex: 

29 """Hybrid BM25 + TF-IDF tool search index""" 

30 

31 def __init__(self, tools: list[StackOneTool], hybrid_alpha: float | None = None) -> None: 

32 """Initialize tool index with hybrid search 

33 

34 Args: 

35 tools: List of tools to index 

36 hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided, 

37 uses DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring 

38 and has been shown to provide better tool discovery accuracy 

39 (10.8% improvement in validation testing). 

40 """ 

41 self.tools = tools 

42 self.tool_map = {tool.name: tool for tool in tools} 

43 # Use default if not provided, then clamp to [0, 1] 

44 alpha = hybrid_alpha if hybrid_alpha is not None else DEFAULT_HYBRID_ALPHA 

45 self.hybrid_alpha = max(0.0, min(1.0, alpha)) 

46 

47 # Prepare corpus for both BM25 and TF-IDF 

48 corpus = [] 

49 tfidf_docs = [] 

50 self.tool_names = [] 

51 

52 for tool in tools: 

53 # Extract category and action from tool name 

54 parts = tool.name.split("_") 

55 category = parts[0] if parts else "" 

56 

57 # Extract action types 

58 action_types = ["create", "update", "delete", "get", "list", "search"] 

59 actions = [p for p in parts if p in action_types] 

60 

61 # Combine name, description, category and tags for indexing 

62 # For TF-IDF: use weighted approach similar to Node.js 

63 tfidf_text = " ".join( 

64 [ 

65 f"{tool.name} {tool.name} {tool.name}", # boost name 

66 f"{category} {' '.join(actions)}", 

67 tool.description, 

68 " ".join(parts), 

69 ] 

70 ) 

71 

72 # For BM25: simpler approach 

73 bm25_text = " ".join( 

74 [ 

75 tool.name, 

76 tool.description, 

77 category, 

78 " ".join(parts), 

79 " ".join(actions), 

80 ] 

81 ) 

82 

83 corpus.append(bm25_text) 

84 tfidf_docs.append(TfidfDocument(id=tool.name, text=tfidf_text)) 

85 self.tool_names.append(tool.name) 

86 

87 # Create BM25 index 

88 self.bm25_retriever = bm25s.BM25() 

89 corpus_tokens = bm25s.tokenize(corpus, stemmer=None, show_progress=False) # ty: ignore[invalid-argument-type] 

90 self.bm25_retriever.index(corpus_tokens) 

91 

92 # Create TF-IDF index 

93 self.tfidf_index = TfidfIndex() 

94 self.tfidf_index.build(tfidf_docs) 

95 

96 def search(self, query: str, limit: int = 5, min_score: float = 0.0) -> list[MetaToolSearchResult]: 

97 """Search for relevant tools using hybrid BM25 + TF-IDF 

98 

99 Args: 

100 query: Natural language query 

101 limit: Maximum number of results 

102 min_score: Minimum relevance score (0-1) 

103 

104 Returns: 

105 List of search results sorted by relevance 

106 """ 

107 # Get more results initially to have better candidate pool for fusion 

108 fetch_limit = max(50, limit) 

109 

110 # Tokenize query for BM25 

111 query_tokens = bm25s.tokenize([query], stemmer=None, show_progress=False) # ty: ignore[invalid-argument-type] 

112 

113 # Search with BM25 

114 bm25_results, bm25_scores = self.bm25_retriever.retrieve( 

115 query_tokens, k=min(fetch_limit, len(self.tools)) 

116 ) 

117 

118 # Search with TF-IDF 

119 tfidf_results = self.tfidf_index.search(query, k=min(fetch_limit, len(self.tools))) 

120 

121 # Build score map for fusion 

122 score_map: dict[str, dict[str, float]] = {} 

123 

124 # Add BM25 scores 

125 for idx, score in zip(bm25_results[0], bm25_scores[0]): 

126 tool_name = self.tool_names[idx] 

127 # Normalize BM25 score to 0-1 range 

128 normalized_score = float(1 / (1 + np.exp(-score / 10))) 

129 # Clamp to [0, 1] 

130 clamped_score = max(0.0, min(1.0, normalized_score)) 

131 score_map[tool_name] = {"bm25": clamped_score} 

132 

133 # Add TF-IDF scores 

134 for result in tfidf_results: 

135 if result.id not in score_map: 135 ↛ 136line 135 didn't jump to line 136 because the condition on line 135 was never true

136 score_map[result.id] = {} 

137 score_map[result.id]["tfidf"] = result.score 

138 

139 # Fuse scores: hybrid_score = alpha * bm25 + (1 - alpha) * tfidf 

140 fused_results: list[tuple[str, float]] = [] 

141 for tool_name, scores in score_map.items(): 

142 bm25_score = scores.get("bm25", 0.0) 

143 tfidf_score = scores.get("tfidf", 0.0) 

144 hybrid_score = self.hybrid_alpha * bm25_score + (1 - self.hybrid_alpha) * tfidf_score 

145 fused_results.append((tool_name, hybrid_score)) 

146 

147 # Sort by score descending 

148 fused_results.sort(key=lambda x: x[1], reverse=True) 

149 

150 # Build final results 

151 search_results = [] 

152 for tool_name, score in fused_results: 

153 if score < min_score: 

154 continue 

155 

156 tool = self.tool_map.get(tool_name) 

157 if tool is None: 157 ↛ 158line 157 didn't jump to line 158 because the condition on line 157 was never true

158 continue 

159 

160 search_results.append( 

161 MetaToolSearchResult( 

162 name=tool.name, 

163 description=tool.description, 

164 score=score, 

165 ) 

166 ) 

167 

168 if len(search_results) >= limit: 

169 break 

170 

171 return search_results 

172 

173 

174def create_meta_search_tools(index: ToolIndex) -> StackOneTool: 

175 """Create the meta_search_tools tool 

176 

177 Args: 

178 index: Tool search index 

179 

180 Returns: 

181 Meta tool for searching relevant tools 

182 """ 

183 name = "meta_search_tools" 

184 description = ( 

185 f"Searches for relevant tools based on a natural language query using hybrid BM25 + TF-IDF search " 

186 f"(alpha={index.hybrid_alpha}). This tool should be called first to discover available tools " 

187 f"before executing them." 

188 ) 

189 

190 parameters = ToolParameters( 

191 type="object", 

192 properties={ 

193 "query": { 

194 "type": "string", 

195 "description": ( 

196 "Natural language query describing what tools you need " 

197 '(e.g., "tools for managing employees", "create time off request")' 

198 ), 

199 }, 

200 "limit": { 

201 "type": "number", 

202 "description": "Maximum number of tools to return (default: 5)", 

203 "default": 5, 

204 }, 

205 "minScore": { 

206 "type": "number", 

207 "description": "Minimum relevance score (0-1) to filter results (default: 0.0)", 

208 "default": 0.0, 

209 }, 

210 }, 

211 ) 

212 

213 def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict: 

214 """Execute the filter tool""" 

215 # Parse arguments 

216 if isinstance(arguments, str): 216 ↛ 217line 216 didn't jump to line 217 because the condition on line 216 was never true

217 kwargs = json.loads(arguments) 

218 else: 

219 kwargs = arguments or {} 

220 

221 query = kwargs.get("query", "") 

222 limit = int(kwargs.get("limit", 5)) 

223 min_score = float(kwargs.get("minScore", 0.0)) 

224 

225 # Search for tools 

226 results = index.search(query, limit, min_score) 

227 

228 # Format results 

229 tools_data = [ 

230 { 

231 "name": r.name, 

232 "description": r.description, 

233 "score": r.score, 

234 } 

235 for r in results 

236 ] 

237 

238 return {"tools": tools_data} 

239 

240 # Create execute config for the meta tool 

241 execute_config = ExecuteConfig( 

242 name=name, 

243 method="POST", 

244 url="", # Meta tools don't make HTTP requests 

245 headers={}, 

246 ) 

247 

248 # Create a wrapper class that delegates execute to our custom function 

249 class MetaSearchTool(StackOneTool): 

250 """Meta tool for searching relevant tools""" 

251 

252 def __init__(self) -> None: 

253 super().__init__( 

254 description=description, 

255 parameters=parameters, 

256 _execute_config=execute_config, 

257 _api_key="", # Meta tools don't need API key 

258 _account_id=None, 

259 ) 

260 

261 def execute( 

262 self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None 

263 ) -> JsonDict: 

264 return execute_filter(arguments) 

265 

266 return MetaSearchTool() 

267 

268 

269def create_meta_execute_tool(tools_collection: Tools) -> StackOneTool: 

270 """Create the meta_execute_tool 

271 

272 Args: 

273 tools_collection: Collection of tools to execute from 

274 

275 Returns: 

276 Meta tool for executing discovered tools 

277 """ 

278 name = "meta_execute_tool" 

279 description = ( 

280 "Executes a tool by name with the provided parameters. " 

281 "Use this after discovering tools with meta_search_tools." 

282 ) 

283 

284 parameters = ToolParameters( 

285 type="object", 

286 properties={ 

287 "toolName": { 

288 "type": "string", 

289 "description": "Name of the tool to execute", 

290 }, 

291 "params": { 

292 "type": "object", 

293 "description": "Parameters to pass to the tool", 

294 "additionalProperties": True, 

295 }, 

296 }, 

297 ) 

298 

299 def execute_tool(arguments: str | JsonDict | None = None) -> JsonDict: 

300 """Execute the meta execute tool""" 

301 # Parse arguments 

302 if isinstance(arguments, str): 302 ↛ 303line 302 didn't jump to line 303 because the condition on line 302 was never true

303 kwargs = json.loads(arguments) 

304 else: 

305 kwargs = arguments or {} 

306 

307 tool_name = kwargs.get("toolName") 

308 params = kwargs.get("params", {}) 

309 

310 if not tool_name: 

311 raise ValueError("toolName is required") 

312 

313 # Get the tool 

314 tool = tools_collection.get_tool(tool_name) 

315 if not tool: 

316 raise ValueError(f"Tool '{tool_name}' not found") 

317 

318 # Execute the tool 

319 return tool.execute(params) 

320 

321 # Create execute config for the meta tool 

322 execute_config = ExecuteConfig( 

323 name=name, 

324 method="POST", 

325 url="", # Meta tools don't make HTTP requests 

326 headers={}, 

327 ) 

328 

329 # Create a wrapper class that delegates execute to our custom function 

330 class MetaExecuteTool(StackOneTool): 

331 """Meta tool for executing discovered tools""" 

332 

333 def __init__(self) -> None: 

334 super().__init__( 

335 description=description, 

336 parameters=parameters, 

337 _execute_config=execute_config, 

338 _api_key="", # Meta tools don't need API key 

339 _account_id=None, 

340 ) 

341 

342 def execute( 

343 self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None 

344 ) -> JsonDict: 

345 return execute_tool(arguments) 

346 

347 return MetaExecuteTool()