Coverage for stackone_ai/meta

1"""Meta tools for dynamic tool discovery and execution"""

3from __future__ import annotations

5import json

6from typing import TYPE_CHECKING

8import bm25s

9import numpy as np

10from pydantic import BaseModel

12from stackone_ai.constants import DEFAULT_HYBRID_ALPHA

13from stackone_ai.models import ExecuteConfig, JsonDict, StackOneTool, ToolParameters

14from stackone_ai.utils.tfidf_index import TfidfDocument, TfidfIndex

16if TYPE_CHECKING:

17 from stackone_ai.models import Tools

20class MetaToolSearchResult(BaseModel):

21 """Result from meta_search_tools"""

23 name: str

24 description: str

25 score: float

28class ToolIndex:

29 """Hybrid BM25 + TF-IDF tool search index"""

31 def __init__(self, tools: list[StackOneTool], hybrid_alpha: float | None = None) -> None:

32 """Initialize tool index with hybrid search

34 Args:

35 tools: List of tools to index

36 hybrid_alpha: Weight for BM25 in hybrid search (0-1). If not provided,

37 uses DEFAULT_HYBRID_ALPHA (0.2), which gives more weight to BM25 scoring

38 and has been shown to provide better tool discovery accuracy

39 (10.8% improvement in validation testing).

40 """

41 self.tools = tools

42 self.tool_map = {tool.name: tool for tool in tools}

43 # Use default if not provided, then clamp to [0, 1]

44 alpha = hybrid_alpha if hybrid_alpha is not None else DEFAULT_HYBRID_ALPHA

45 self.hybrid_alpha = max(0.0, min(1.0, alpha))

47 # Prepare corpus for both BM25 and TF-IDF

48 corpus = []

49 tfidf_docs = []

50 self.tool_names = []

52 for tool in tools:

53 # Extract category and action from tool name

54 parts = tool.name.split("_")

55 category = parts[0] if parts else ""

57 # Extract action types

58 action_types = ["create", "update", "delete", "get", "list", "search"]

59 actions = [p for p in parts if p in action_types]

61 # Combine name, description, category and tags for indexing

62 # For TF-IDF: use weighted approach similar to Node.js

63 tfidf_text = " ".join(

64 [

65 f"{tool.name} {tool.name} {tool.name}", # boost name

66 f"{category} {' '.join(actions)}",

67 tool.description,

68 " ".join(parts),

69 ]

70 )

72 # For BM25: simpler approach

73 bm25_text = " ".join(

74 [

75 tool.name,

76 tool.description,

77 category,

78 " ".join(parts),

79 " ".join(actions),

80 ]

81 )

83 corpus.append(bm25_text)

84 tfidf_docs.append(TfidfDocument(id=tool.name, text=tfidf_text))

85 self.tool_names.append(tool.name)

87 # Create BM25 index

88 self.bm25_retriever = bm25s.BM25()

89 corpus_tokens = bm25s.tokenize(corpus, stemmer=None, show_progress=False) # ty: ignore[invalid-argument-type]

90 self.bm25_retriever.index(corpus_tokens)

92 # Create TF-IDF index

93 self.tfidf_index = TfidfIndex()

94 self.tfidf_index.build(tfidf_docs)

96 def search(self, query: str, limit: int = 5, min_score: float = 0.0) -> list[MetaToolSearchResult]:

97 """Search for relevant tools using hybrid BM25 + TF-IDF

99 Args:

100 query: Natural language query

101 limit: Maximum number of results

102 min_score: Minimum relevance score (0-1)

103

104 Returns:

105 List of search results sorted by relevance

106 """

107 # Get more results initially to have better candidate pool for fusion

108 fetch_limit = max(50, limit)

109

110 # Tokenize query for BM25

111 query_tokens = bm25s.tokenize([query], stemmer=None, show_progress=False) # ty: ignore[invalid-argument-type]

112

113 # Search with BM25

114 bm25_results, bm25_scores = self.bm25_retriever.retrieve(

115 query_tokens, k=min(fetch_limit, len(self.tools))

116 )

117

118 # Search with TF-IDF

119 tfidf_results = self.tfidf_index.search(query, k=min(fetch_limit, len(self.tools)))

120

121 # Build score map for fusion

122 score_map: dict[str, dict[str, float]] = {}

123

124 # Add BM25 scores

125 for idx, score in zip(bm25_results[0], bm25_scores[0]):

126 tool_name = self.tool_names[idx]

127 # Normalize BM25 score to 0-1 range

128 normalized_score = float(1 / (1 + np.exp(-score / 10)))

129 # Clamp to [0, 1]

130 clamped_score = max(0.0, min(1.0, normalized_score))

131 score_map[tool_name] = {"bm25": clamped_score}

132

133 # Add TF-IDF scores

134 for result in tfidf_results:

135 if result.id not in score_map: 135 ↛ 136line 135 didn't jump to line 136 because the condition on line 135 was never true

136 score_map[result.id] = {}

137 score_map[result.id]["tfidf"] = result.score

138

139 # Fuse scores: hybrid_score = alpha * bm25 + (1 - alpha) * tfidf

140 fused_results: list[tuple[str, float]] = []

141 for tool_name, scores in score_map.items():

142 bm25_score = scores.get("bm25", 0.0)

143 tfidf_score = scores.get("tfidf", 0.0)

144 hybrid_score = self.hybrid_alpha * bm25_score + (1 - self.hybrid_alpha) * tfidf_score

145 fused_results.append((tool_name, hybrid_score))

146

147 # Sort by score descending

148 fused_results.sort(key=lambda x: x[1], reverse=True)

149

150 # Build final results

151 search_results = []

152 for tool_name, score in fused_results:

153 if score < min_score:

154 continue

155

156 tool = self.tool_map.get(tool_name)

157 if tool is None: 157 ↛ 158line 157 didn't jump to line 158 because the condition on line 157 was never true

158 continue

159

160 search_results.append(

161 MetaToolSearchResult(

162 name=tool.name,

163 description=tool.description,

164 score=score,

165 )

166 )

167

168 if len(search_results) >= limit:

169 break

170

171 return search_results

172

173

174def create_meta_search_tools(index: ToolIndex) -> StackOneTool:

175 """Create the meta_search_tools tool

176

177 Args:

178 index: Tool search index

179

180 Returns:

181 Meta tool for searching relevant tools

182 """

183 name = "meta_search_tools"

184 description = (

185 f"Searches for relevant tools based on a natural language query using hybrid BM25 + TF-IDF search "

186 f"(alpha={index.hybrid_alpha}). This tool should be called first to discover available tools "

187 f"before executing them."

188 )

189

190 parameters = ToolParameters(

191 type="object",

192 properties={

193 "query": {

194 "type": "string",

195 "description": (

196 "Natural language query describing what tools you need "

197 '(e.g., "tools for managing employees", "create time off request")'

198 ),

199 },

200 "limit": {

201 "type": "number",

202 "description": "Maximum number of tools to return (default: 5)",

203 "default": 5,

204 },

205 "minScore": {

206 "type": "number",

207 "description": "Minimum relevance score (0-1) to filter results (default: 0.0)",

208 "default": 0.0,

209 },

210 },

211 )

212

213 def execute_filter(arguments: str | JsonDict | None = None) -> JsonDict:

214 """Execute the filter tool"""

215 # Parse arguments

216 if isinstance(arguments, str): 216 ↛ 217line 216 didn't jump to line 217 because the condition on line 216 was never true

217 kwargs = json.loads(arguments)

218 else:

219 kwargs = arguments or {}

220

221 query = kwargs.get("query", "")

222 limit = int(kwargs.get("limit", 5))

223 min_score = float(kwargs.get("minScore", 0.0))

224

225 # Search for tools

226 results = index.search(query, limit, min_score)

227

228 # Format results

229 tools_data = [

230 {

231 "name": r.name,

232 "description": r.description,

233 "score": r.score,

234 }

235 for r in results

236 ]

237

238 return {"tools": tools_data}

239

240 # Create execute config for the meta tool

241 execute_config = ExecuteConfig(

242 name=name,

243 method="POST",

244 url="", # Meta tools don't make HTTP requests

245 headers={},

246 )

247

248 # Create a wrapper class that delegates execute to our custom function

249 class MetaSearchTool(StackOneTool):

250 """Meta tool for searching relevant tools"""

251

252 def __init__(self) -> None:

253 super().__init__(

254 description=description,

255 parameters=parameters,

256 _execute_config=execute_config,

257 _api_key="", # Meta tools don't need API key

258 _account_id=None,

259 )

260

261 def execute(

262 self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None

263 ) -> JsonDict:

264 return execute_filter(arguments)

265

266 return MetaSearchTool()

267

268

269def create_meta_execute_tool(tools_collection: Tools) -> StackOneTool:

270 """Create the meta_execute_tool

271

272 Args:

273 tools_collection: Collection of tools to execute from

274

275 Returns:

276 Meta tool for executing discovered tools

277 """

278 name = "meta_execute_tool"

279 description = (

280 "Executes a tool by name with the provided parameters. "

281 "Use this after discovering tools with meta_search_tools."

282 )

283

284 parameters = ToolParameters(

285 type="object",

286 properties={

287 "toolName": {

288 "type": "string",

289 "description": "Name of the tool to execute",

290 },

291 "params": {

292 "type": "object",

293 "description": "Parameters to pass to the tool",

294 "additionalProperties": True,

295 },

296 },

297 )

298

299 def execute_tool(arguments: str | JsonDict | None = None) -> JsonDict:

300 """Execute the meta execute tool"""

301 # Parse arguments

302 if isinstance(arguments, str): 302 ↛ 303line 302 didn't jump to line 303 because the condition on line 302 was never true

303 kwargs = json.loads(arguments)

304 else:

305 kwargs = arguments or {}

306

307 tool_name = kwargs.get("toolName")

308 params = kwargs.get("params", {})

309

310 if not tool_name:

311 raise ValueError("toolName is required")

312

313 # Get the tool

314 tool = tools_collection.get_tool(tool_name)

315 if not tool:

316 raise ValueError(f"Tool '{tool_name}' not found")

317

318 # Execute the tool

319 return tool.execute(params)

320

321 # Create execute config for the meta tool

322 execute_config = ExecuteConfig(

323 name=name,

324 method="POST",

325 url="", # Meta tools don't make HTTP requests

326 headers={},

327 )

328

329 # Create a wrapper class that delegates execute to our custom function

330 class MetaExecuteTool(StackOneTool):

331 """Meta tool for executing discovered tools"""

332

333 def __init__(self) -> None:

334 super().__init__(

335 description=description,

336 parameters=parameters,

337 _execute_config=execute_config,

338 _api_key="", # Meta tools don't need API key

339 _account_id=None,

340 )

341

342 def execute(

343 self, arguments: str | JsonDict | None = None, *, options: JsonDict | None = None

344 ) -> JsonDict:

345 return execute_tool(arguments)

346

347 return MetaExecuteTool()

Coverage for stackone_ai/meta_tools.py: 94%

114 statements