Merge pull request #147 from crewAIInc/feature/enhanced-serper-search

feat(serper-dev): implement enhanced search capabilities and error ha…
2026-01-08 15:48:29 +00:00 · 2024-12-19 13:15:11 -05:00
parent a49be2fc52 e40ca38daf
commit b7a132db89
2 changed files with 229 additions and 68 deletions
--- a/src/crewai_tools/tools/serper_dev_tool/README.md
+++ b/src/crewai_tools/tools/serper_dev_tool/README.md
@@ -1,30 +1,49 @@
 # SerperDevTool Documentation

 ## Description
-This tool is designed to perform a semantic search for a specified query from a text's content across the internet. It utilizes the `serper.dev` API to fetch and display the most relevant search results based on the query provided by the user.
+The SerperDevTool is a powerful search tool that interfaces with the `serper.dev` API to perform internet searches. It supports multiple search types including general search and news search, with features like knowledge graph integration, organic results, "People Also Ask" questions, and related searches.
+
+## Features
+- Multiple search types: 'search' (default) and 'news'
+- Knowledge graph integration for enhanced search context
+- Organic search results with sitelinks
+- "People Also Ask" questions and answers
+- Related searches suggestions
+- News search with date, source, and image information
+- Configurable number of results
+- Optional result saving to file

 ## Installation
-To incorporate this tool into your project, follow the installation instructions below:
 ```shell
 pip install 'crewai[tools]'
 ```

-## Example
-The following example demonstrates how to initialize the tool and execute a search with a given query:
-
+## Usage
 ```python
 from crewai_tools import SerperDevTool

-# Initialize the tool for internet searching capabilities
-tool = SerperDevTool()
+# Initialize the tool
+tool = SerperDevTool(
+    n_results=10,  # Optional: Number of results to return (default: 10)
+    save_file=False,  # Optional: Save results to file (default: False)
+    search_type="search"  # Optional: Type of search - "search" or "news" (default: "search")
+)
+
+# Execute a search
+results = tool._run(search_query="your search query")
 ```

-## Steps to Get Started
-To effectively use the `SerperDevTool`, follow these steps:
+## Configuration
+1. **API Key Setup**:
+   - Sign up for an account at `serper.dev`
+   - Obtain your API key
+   - Set the environment variable: `SERPER_API_KEY`

-1. **Package Installation**: Confirm that the `crewai[tools]` package is installed in your Python environment.
-2. **API Key Acquisition**: Acquire a `serper.dev` API key by registering for a free account at `serper.dev`.
-3. **Environment Configuration**: Store your obtained API key in an environment variable named `SERPER_API_KEY` to facilitate its use by the tool.
-
-## Conclusion
-By integrating the `SerperDevTool` into Python projects, users gain the ability to conduct real-time, relevant searches across the internet directly from their applications. By adhering to the setup and usage guidelines provided, incorporating this tool into projects is streamlined and straightforward.
+## Response Format
+The tool returns structured data including:
+- Search parameters
+- Knowledge graph data (for general search)
+- Organic search results
+- "People Also Ask" questions
+- Related searches
+- News results (for news search type)
--- a/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py
+++ b/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py
@@ -1,19 +1,29 @@
 import datetime
 import json
 import os
-from typing import Any, Optional, Type
+import logging
+from typing import Any, Type

 import requests
 from crewai.tools import BaseTool
 from pydantic import BaseModel, Field


+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+)
+logger = logging.getLogger(__name__)
+
 def _save_results_to_file(content: str) -> None:
    """Saves the search results to a file."""
-    filename = f"search_results_{datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
-    with open(filename, "w") as file:
-        file.write(content)
-    print(f"Results saved to {filename}")
+    try:
+        filename = f"search_results_{datetime.datetime.now().strftime('%Y-%m-%d_%H-%M-%S')}.txt"
+        with open(filename, "w") as file:
+            file.write(content)
+        logger.info(f"Results saved to {filename}")
+    except IOError as e:
+        logger.error(f"Failed to save results to file: {e}")
+        raise


 class SerperDevToolSchema(BaseModel):
@@ -27,67 +37,199 @@ class SerperDevToolSchema(BaseModel):
 class SerperDevTool(BaseTool):
    name: str = "Search the internet"
    description: str = (
-        "A tool that can be used to search the internet with a search_query."
+        "A tool that can be used to search the internet with a search_query. "
+        "Supports different search types: 'search' (default), 'news'"
    )
    args_schema: Type[BaseModel] = SerperDevToolSchema
-    search_url: str = "https://google.serper.dev/search"
-    country: Optional[str] = ""
-    location: Optional[str] = ""
-    locale: Optional[str] = ""
+    base_url: str = "https://google.serper.dev"
    n_results: int = 10
    save_file: bool = False
+    search_type: str = "search"

-    def _run(
-        self,
-        **kwargs: Any,
-    ) -> Any:
+    def _get_search_url(self, search_type: str) -> str:
+        """Get the appropriate endpoint URL based on search type."""
+        search_type = search_type.lower()
+        allowed_search_types = ["search", "news"]
+        if search_type not in allowed_search_types:
+            raise ValueError(
+                f"Invalid search type: {search_type}. Must be one of: {', '.join(allowed_search_types)}"
+            )
+        return f"{self.base_url}/{search_type}"

-        search_query = kwargs.get("search_query") or kwargs.get("query")
-        save_file = kwargs.get("save_file", self.save_file)
-        n_results = kwargs.get("n_results", self.n_results)
+    def _process_knowledge_graph(self, kg: dict) -> dict:
+        """Process knowledge graph data from search results."""
+        return {
+            "title": kg.get("title", ""),
+            "type": kg.get("type", ""),
+            "website": kg.get("website", ""),
+            "imageUrl": kg.get("imageUrl", ""),
+            "description": kg.get("description", ""),
+            "descriptionSource": kg.get("descriptionSource", ""),
+            "descriptionLink": kg.get("descriptionLink", ""),
+            "attributes": kg.get("attributes", {}),
+        }

-        payload = {"q": search_query, "num": n_results}
+    def _process_organic_results(self, organic_results: list) -> list:
+        """Process organic search results."""
+        processed_results = []
+        for result in organic_results[: self.n_results]:
+            try:
+                result_data = {
+                    "title": result["title"],
+                    "link": result["link"],
+                    "snippet": result.get("snippet", ""),
+                    "position": result.get("position"),
+                }

-        if self.country != "":
-            payload["gl"] = self.country
-        if self.location != "":
-            payload["location"] = self.location
-        if self.locale != "":
-            payload["hl"] = self.locale
+                if "sitelinks" in result:
+                    result_data["sitelinks"] = [
+                        {
+                            "title": sitelink.get("title", ""),
+                            "link": sitelink.get("link", ""),
+                        }
+                        for sitelink in result["sitelinks"]
+                    ]

-        payload = json.dumps(payload)
+                processed_results.append(result_data)
+            except KeyError:
+                logger.warning(f"Skipping malformed organic result: {result}")
+                continue
+        return processed_results

+    def _process_people_also_ask(self, paa_results: list) -> list:
+        """Process 'People Also Ask' results."""
+        processed_results = []
+        for result in paa_results[: self.n_results]:
+            try:
+                result_data = {
+                    "question": result["question"],
+                    "snippet": result.get("snippet", ""),
+                    "title": result.get("title", ""),
+                    "link": result.get("link", ""),
+                }
+                processed_results.append(result_data)
+            except KeyError:
+                logger.warning(f"Skipping malformed PAA result: {result}")
+                continue
+        return processed_results
+
+    def _process_related_searches(self, related_results: list) -> list:
+        """Process related search results."""
+        processed_results = []
+        for result in related_results[: self.n_results]:
+            try:
+                processed_results.append({"query": result["query"]})
+            except KeyError:
+                logger.warning(f"Skipping malformed related search result: {result}")
+                continue
+        return processed_results
+
+    def _process_news_results(self, news_results: list) -> list:
+        """Process news search results."""
+        processed_results = []
+        for result in news_results[: self.n_results]:
+            try:
+                result_data = {
+                    "title": result["title"],
+                    "link": result["link"],
+                    "snippet": result.get("snippet", ""),
+                    "date": result.get("date", ""),
+                    "source": result.get("source", ""),
+                    "imageUrl": result.get("imageUrl", ""),
+                }
+                processed_results.append(result_data)
+            except KeyError:
+                logger.warning(f"Skipping malformed news result: {result}")
+                continue
+        return processed_results
+
+    def _make_api_request(self, search_query: str, search_type: str) -> dict:
+        """Make API request to Serper."""
+        search_url = self._get_search_url(search_type)
+        payload = json.dumps({"q": search_query, "num": self.n_results})
        headers = {
            "X-API-KEY": os.environ["SERPER_API_KEY"],
            "content-type": "application/json",
        }

-        response = requests.request(
-            "POST", self.search_url, headers=headers, data=payload
-        )
-        results = response.json()
-
-        if "organic" in results:
-            results = results["organic"][: self.n_results]
-            string = []
-            for result in results:
-                try:
-                    string.append(
-                        "\n".join(
-                            [
-                                f"Title: {result['title']}",
-                                f"Link: {result['link']}",
-                                f"Snippet: {result['snippet']}",
-                                "---",
-                            ]
-                        )
-                    )
-                except KeyError:
-                    continue
-
-            content = "\n".join(string)
-            if save_file:
-                _save_results_to_file(content)
-            return f"\nSearch results: {content}\n"
-        else:
+        response = None
+        try:
+            response = requests.post(
+                search_url, headers=headers, json=json.loads(payload), timeout=10
+            )
+            response.raise_for_status()
+            results = response.json()
+            if not results:
+                logger.error("Empty response from Serper API")
+                raise ValueError("Empty response from Serper API")
            return results
+        except requests.exceptions.RequestException as e:
+            error_msg = f"Error making request to Serper API: {e}"
+            if response is not None and hasattr(response, "content"):
+                error_msg += f"\nResponse content: {response.content}"
+            logger.error(error_msg)
+            raise
+        except json.JSONDecodeError as e:
+            if response is not None and hasattr(response, "content"):
+                logger.error(f"Error decoding JSON response: {e}")
+                logger.error(f"Response content: {response.content}")
+            else:
+                logger.error(
+                    f"Error decoding JSON response: {e} (No response content available)"
+                )
+            raise
+
+    def _process_search_results(self, results: dict, search_type: str) -> dict:
+        """Process search results based on search type."""
+        formatted_results = {}
+
+        if search_type == "search":
+            if "knowledgeGraph" in results:
+                formatted_results["knowledgeGraph"] = self._process_knowledge_graph(
+                    results["knowledgeGraph"]
+                )
+
+            if "organic" in results:
+                formatted_results["organic"] = self._process_organic_results(
+                    results["organic"]
+                )
+
+            if "peopleAlsoAsk" in results:
+                formatted_results["peopleAlsoAsk"] = self._process_people_also_ask(
+                    results["peopleAlsoAsk"]
+                )
+
+            if "relatedSearches" in results:
+                formatted_results["relatedSearches"] = self._process_related_searches(
+                    results["relatedSearches"]
+                )
+
+        elif search_type == "news":
+            if "news" in results:
+                formatted_results["news"] = self._process_news_results(results["news"])
+
+        return formatted_results
+
+    def _run(self, **kwargs: Any) -> Any:
+        """Execute the search operation."""
+        search_query = kwargs.get("search_query") or kwargs.get("query")
+        search_type = kwargs.get("search_type", self.search_type)
+        save_file = kwargs.get("save_file", self.save_file)
+
+        results = self._make_api_request(search_query, search_type)
+
+        formatted_results = {
+            "searchParameters": {
+                "q": search_query,
+                "type": search_type,
+                **results.get("searchParameters", {}),
+            }
+        }
+
+        formatted_results.update(self._process_search_results(results, search_type))
+        formatted_results["credits"] = results.get("credits", 1)
+
+        if save_file:
+            _save_results_to_file(json.dumps(formatted_results, indent=2))
+
+        return formatted_results