Coverage for stackone_ai / semantic_search.py: 93%

51 statements  

« prev     ^ index     » next       coverage.py v7.13.1, created at 2026-04-02 08:51 +0000

1"""Semantic search client for StackOne action search API. 

2 

3How Semantic Search Works 

4========================= 

5 

6The SDK provides three ways to discover tools using semantic search. 

7Each path trades off between speed, filtering, and completeness. 

8 

91. ``search_tools(query)`` — Full tool discovery (recommended for agent frameworks) 

10~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

11 

12This is the primary method used when integrating with OpenAI, LangChain, or CrewAI. 

13The internal flow is: 

14 

151. Fetch tools from linked accounts via MCP (provides connectors and tool schemas) 

162. Search EACH connector in parallel via the semantic search API (/actions/search) 

173. Match search results to MCP tool definitions 

184. Deduplicate, sort by relevance score, apply top_k 

195. Return Tools sorted by relevance score 

20 

21Key point: only the user's own connectors are searched — no wasted results 

22from connectors the user doesn't have. Tool schemas come from MCP (source 

23of truth), while the search API provides relevance ranking. 

24 

25If the semantic API is unavailable, the SDK falls back to a local 

26BM25 + TF-IDF hybrid search over the fetched tools (unless 

27``search="semantic"`` is specified). 

28 

29 

302. ``search_action_names(query)`` — Lightweight discovery 

31~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

32 

33Queries the semantic API directly and returns action IDs with 

34similarity scores, **without** building full tool objects. Useful 

35for previewing results before committing to a full fetch. 

36 

37When ``account_ids`` are provided, each connector is searched in 

38parallel (same as ``search_tools``). Without ``account_ids``, results 

39come from the full StackOne catalog. 

40 

41 

423. ``toolset.get_search_tool()`` — Agent-loop callable 

43~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 

44 

45Returns a callable ``SearchTool`` that wraps ``search_tools()``. 

46Call it with a natural language query to get a ``Tools`` collection 

47back. Designed for agent loops where the LLM decides what to search for. 

48""" 

49 

50from __future__ import annotations 

51 

52import base64 

53from typing import Any 

54 

55import httpx 

56from pydantic import BaseModel 

57 

58from stackone_ai.constants import DEFAULT_BASE_URL 

59 

60 

61class SemanticSearchError(Exception): 

62 """Raised when semantic search fails.""" 

63 

64 pass 

65 

66 

67class SemanticSearchResult(BaseModel): 

68 """Single result from semantic search API.""" 

69 

70 id: str 

71 similarity_score: float 

72 

73 

74class SemanticSearchResponse(BaseModel): 

75 """Response from /actions/search endpoint.""" 

76 

77 results: list[SemanticSearchResult] 

78 total_count: int 

79 query: str 

80 connector_filter: str | None = None 

81 project_filter: str | None = None 

82 

83 

84class SemanticSearchClient: 

85 """Client for StackOne semantic search API. 

86 

87 This client provides access to the semantic search endpoint which uses 

88 enhanced embeddings for higher accuracy than local BM25+TF-IDF search. 

89 

90 Example: 

91 client = SemanticSearchClient(api_key="sk-xxx") 

92 response = client.search("create employee", connector="bamboohr", top_k=5) 

93 for result in response.results: 

94 print(f"{result.action_id}: {result.similarity_score:.2f}") 

95 """ 

96 

97 def __init__( 

98 self, 

99 api_key: str, 

100 base_url: str = DEFAULT_BASE_URL, 

101 timeout: float = 30.0, 

102 ) -> None: 

103 """Initialize the semantic search client. 

104 

105 Args: 

106 api_key: StackOne API key 

107 base_url: Base URL for API requests 

108 timeout: Request timeout in seconds 

109 """ 

110 self.api_key = api_key 

111 self.base_url = base_url.rstrip("/") 

112 self.timeout = timeout 

113 

114 def _build_auth_header(self) -> str: 

115 """Build the Basic auth header.""" 

116 token = base64.b64encode(f"{self.api_key}:".encode()).decode() 

117 return f"Basic {token}" 

118 

119 def search( 

120 self, 

121 query: str, 

122 connector: str | None = None, 

123 top_k: int | None = None, 

124 project_id: str | None = None, 

125 min_similarity: float | None = None, 

126 ) -> SemanticSearchResponse: 

127 """Search for relevant actions using semantic search. 

128 

129 Args: 

130 query: Natural language query describing what tools/actions you need 

131 connector: Optional connector/provider filter (e.g., "bamboohr", "slack") 

132 top_k: Maximum number of results to return. If not provided, uses the backend default. 

133 project_id: Optional project scope (e.g., "103/dev-56501"). When provided, 

134 results include both global actions and project-specific actions. 

135 min_similarity: Minimum similarity score threshold (0-1). If not provided, 

136 the server uses its default (currently 0.4). 

137 

138 Returns: 

139 SemanticSearchResponse containing matching actions with similarity scores 

140 

141 Raises: 

142 SemanticSearchError: If the API call fails 

143 

144 Example: 

145 response = client.search("onboard a new team member", top_k=5) 

146 for result in response.results: 

147 print(f"{result.action_id}: {result.similarity_score:.2f}") 

148 """ 

149 url = f"{self.base_url}/actions/search" 

150 headers = { 

151 "Authorization": self._build_auth_header(), 

152 "Content-Type": "application/json", 

153 } 

154 payload: dict[str, Any] = {"query": query} 

155 if top_k is not None: 

156 payload["top_k"] = top_k 

157 if connector: 

158 payload["connector"] = connector 

159 if project_id: 159 ↛ 160line 159 didn't jump to line 160 because the condition on line 159 was never true

160 payload["project_id"] = project_id 

161 if min_similarity is not None: 

162 payload["min_similarity"] = min_similarity 

163 

164 try: 

165 response = httpx.post(url, json=payload, headers=headers, timeout=self.timeout) 

166 response.raise_for_status() 

167 data = response.json() 

168 return SemanticSearchResponse(**data) 

169 except httpx.HTTPStatusError as e: 

170 raise SemanticSearchError(f"API error: {e.response.status_code} - {e.response.text}") from e 

171 except httpx.RequestError as e: 

172 raise SemanticSearchError(f"Request failed: {e}") from e 

173 except Exception as e: 

174 raise SemanticSearchError(f"Search failed: {e}") from e 

175 

176 def search_action_names( 

177 self, 

178 query: str, 

179 connector: str | None = None, 

180 top_k: int | None = None, 

181 min_similarity: float | None = None, 

182 project_id: str | None = None, 

183 ) -> list[str]: 

184 """Convenience method returning just action names. 

185 

186 Args: 

187 query: Natural language query 

188 connector: Optional connector/provider filter 

189 top_k: Maximum number of results. If not provided, uses the backend default. 

190 min_similarity: Minimum similarity score threshold (0-1). If not provided, 

191 the server uses its default. 

192 project_id: Optional project scope for multi-tenant filtering 

193 

194 Returns: 

195 List of action names sorted by relevance 

196 

197 Example: 

198 action_names = client.search_action_names( 

199 "create employee", 

200 connector="bamboohr", 

201 min_similarity=0.5 

202 ) 

203 """ 

204 response = self.search(query, connector, top_k, project_id, min_similarity=min_similarity) 

205 return [r.id for r in response.results]