diff --git a/packages/tools/pyproject.toml b/packages/tools/pyproject.toml index 039089d7d..74556fbf4 100644 --- a/packages/tools/pyproject.toml +++ b/packages/tools/pyproject.toml @@ -9,12 +9,18 @@ authors = [ requires-python = ">=3.10,<3.14" dependencies = [ "crewai-core", + "click>=8.1.8", "lancedb>=0.5.4", "pytube>=15.0.0", "requests>=2.31.0", "docker>=7.1.0", "tiktoken>=0.8.0", "stagehand>=0.4.1", + "portalocker==2.7.0", + "beautifulsoup4>=4.13.4", + "pypdf>=5.9.0", + "python-docx>=1.2.0", + "youtube-transcript-api>=1.2.2", ] [project.urls] @@ -24,9 +30,6 @@ Documentation = "https://docs.crewai.com" [project.optional-dependencies] -embedchain = [ - "embedchain>=0.1.114", -] scrapfly-sdk = [ "scrapfly-sdk>=0.8.19", ] @@ -124,6 +127,12 @@ oxylabs = [ mongodb = [ "pymongo>=4.13" ] +mysql = [ + "pymysql>=1.1.1" +] +postgresql = [ + "psycopg2-binary>=2.9.10" +] bedrock = [ "beautifulsoup4>=4.13.4", "bedrock-agentcore>=0.1.0", @@ -135,6 +144,9 @@ contextual = [ "nest-asyncio>=1.6.0", ] +[tool.hatch.metadata] +allow-direct-references = true + [tool.pytest.ini_options] testpaths = ["tests"] @@ -149,3 +161,12 @@ build-backend = "hatchling.build" [tool.hatch.build.targets.wheel] packages = ["src/crewai_tools"] + +[dependency-groups] +dev = [ + "pytest-asyncio>=0.25.2", + "pytest>=8.0.0", + "pytest-recording>=0.13.3", + "mypy>=1.18.1", + "ruff>=0.13.0", +] diff --git a/packages/tools/src/crewai_tools/__init__.py b/packages/tools/src/crewai_tools/__init__.py index d705a375b..b5635520b 100644 --- a/packages/tools/src/crewai_tools/__init__.py +++ b/packages/tools/src/crewai_tools/__init__.py @@ -59,6 +59,7 @@ from .tools import ( OxylabsAmazonSearchScraperTool, OxylabsGoogleSearchScraperTool, OxylabsUniversalScraperTool, + ParallelSearchTool, PatronusEvalTool, PatronusLocalEvaluatorTool, PatronusPredefinedCriteriaEvalTool, @@ -96,5 +97,4 @@ from .tools import ( YoutubeChannelSearchTool, YoutubeVideoSearchTool, ZapierActionTools, - ParallelSearchTool, ) diff --git a/packages/tools/src/crewai_tools/adapters/crewai_rag_adapter.py b/packages/tools/src/crewai_tools/adapters/crewai_rag_adapter.py new file mode 100644 index 000000000..5951f9fde --- /dev/null +++ b/packages/tools/src/crewai_tools/adapters/crewai_rag_adapter.py @@ -0,0 +1,267 @@ +"""Adapter for CrewAI's native RAG system.""" + +import hashlib +from pathlib import Path +from typing import Any, TypeAlias, TypedDict + +from crewai.rag.config.types import RagConfigType +from crewai.rag.config.utils import get_rag_client +from crewai.rag.core.base_client import BaseClient +from crewai.rag.factory import create_client +from crewai.rag.types import BaseRecord, SearchResult +from crewai_tools.rag.data_types import DataType +from crewai_tools.rag.misc import sanitize_metadata_for_chromadb +from crewai_tools.tools.rag.rag_tool import Adapter +from pydantic import PrivateAttr +from typing_extensions import Unpack + +ContentItem: TypeAlias = str | Path | dict[str, Any] + + +class AddDocumentParams(TypedDict, total=False): + """Parameters for adding documents to the RAG system.""" + + data_type: DataType + metadata: dict[str, Any] + website: str + url: str + file_path: str | Path + github_url: str + youtube_url: str + directory_path: str | Path + + +class CrewAIRagAdapter(Adapter): + """Adapter that uses CrewAI's native RAG system. + + Supports custom vector database configuration through the config parameter. + """ + + collection_name: str = "default" + summarize: bool = False + similarity_threshold: float = 0.6 + limit: int = 5 + config: RagConfigType | None = None + _client: BaseClient | None = PrivateAttr(default=None) + + def model_post_init(self, __context: Any) -> None: + """Initialize the CrewAI RAG client after model initialization.""" + if self.config is not None: + self._client = create_client(self.config) + else: + self._client = get_rag_client() + self._client.get_or_create_collection(collection_name=self.collection_name) + + def query( + self, + question: str, + similarity_threshold: float | None = None, + limit: int | None = None, + ) -> str: + """Query the knowledge base with a question. + + Args: + question: The question to ask + similarity_threshold: Minimum similarity score for results (default: 0.6) + limit: Maximum number of results to return (default: 5) + + Returns: + Relevant content from the knowledge base + """ + search_limit = limit if limit is not None else self.limit + search_threshold = ( + similarity_threshold + if similarity_threshold is not None + else self.similarity_threshold + ) + + results: list[SearchResult] = self._client.search( + collection_name=self.collection_name, + query=question, + limit=search_limit, + score_threshold=search_threshold, + ) + + if not results: + return "No relevant content found." + + contents: list[str] = [] + for result in results: + content: str = result.get("content", "") + if content: + contents.append(content) + + return "\n\n".join(contents) + + def add(self, *args: ContentItem, **kwargs: Unpack[AddDocumentParams]) -> None: + """Add content to the knowledge base. + + This method handles various input types and converts them to documents + for the vector database. It supports the data_type parameter for + compatibility with existing tools. + + Args: + *args: Content items to add (strings, paths, or document dicts) + **kwargs: Additional parameters including data_type, metadata, etc. + """ + import os + + from crewai_tools.rag.base_loader import LoaderResult + from crewai_tools.rag.data_types import DataType, DataTypes + from crewai_tools.rag.source_content import SourceContent + + documents: list[BaseRecord] = [] + data_type: DataType | None = kwargs.get("data_type") + base_metadata: dict[str, Any] = kwargs.get("metadata", {}) + + for arg in args: + source_ref: str + if isinstance(arg, dict): + source_ref = str(arg.get("source", arg.get("content", ""))) + else: + source_ref = str(arg) + + if not data_type: + data_type = DataTypes.from_content(source_ref) + + if data_type == DataType.DIRECTORY: + if not os.path.isdir(source_ref): + raise ValueError(f"Directory does not exist: {source_ref}") + + # Define binary and non-text file extensions to skip + binary_extensions = { + ".pyc", + ".pyo", + ".png", + ".jpg", + ".jpeg", + ".gif", + ".bmp", + ".ico", + ".svg", + ".webp", + ".pdf", + ".zip", + ".tar", + ".gz", + ".bz2", + ".7z", + ".rar", + ".exe", + ".dll", + ".so", + ".dylib", + ".bin", + ".dat", + ".db", + ".sqlite", + ".class", + ".jar", + ".war", + ".ear", + } + + for root, dirs, files in os.walk(source_ref): + dirs[:] = [d for d in dirs if not d.startswith(".")] + + for filename in files: + if filename.startswith("."): + continue + + # Skip binary files based on extension + file_ext = os.path.splitext(filename)[1].lower() + if file_ext in binary_extensions: + continue + + # Skip __pycache__ directories + if "__pycache__" in root: + continue + + file_path: str = os.path.join(root, filename) + try: + file_data_type: DataType = DataTypes.from_content(file_path) + file_loader = file_data_type.get_loader() + file_chunker = file_data_type.get_chunker() + + file_source = SourceContent(file_path) + file_result: LoaderResult = file_loader.load(file_source) + + file_chunks = file_chunker.chunk(file_result.content) + + for chunk_idx, file_chunk in enumerate(file_chunks): + file_metadata: dict[str, Any] = base_metadata.copy() + file_metadata.update(file_result.metadata) + file_metadata["data_type"] = str(file_data_type) + file_metadata["file_path"] = file_path + file_metadata["chunk_index"] = chunk_idx + file_metadata["total_chunks"] = len(file_chunks) + + if isinstance(arg, dict): + file_metadata.update(arg.get("metadata", {})) + + chunk_id = hashlib.sha256( + f"{file_result.doc_id}_{chunk_idx}_{file_chunk}".encode() + ).hexdigest() + + documents.append( + { + "doc_id": chunk_id, + "content": file_chunk, + "metadata": sanitize_metadata_for_chromadb( + file_metadata + ), + } + ) + except Exception: + # Silently skip files that can't be processed + continue + else: + metadata: dict[str, Any] = base_metadata.copy() + + if data_type in [ + DataType.PDF_FILE, + DataType.TEXT_FILE, + DataType.DOCX, + DataType.CSV, + DataType.JSON, + DataType.XML, + DataType.MDX, + ]: + if not os.path.isfile(source_ref): + raise FileNotFoundError(f"File does not exist: {source_ref}") + + loader = data_type.get_loader() + chunker = data_type.get_chunker() + + source_content = SourceContent(source_ref) + loader_result: LoaderResult = loader.load(source_content) + + chunks = chunker.chunk(loader_result.content) + + for i, chunk in enumerate(chunks): + chunk_metadata: dict[str, Any] = metadata.copy() + chunk_metadata.update(loader_result.metadata) + chunk_metadata["data_type"] = str(data_type) + chunk_metadata["chunk_index"] = i + chunk_metadata["total_chunks"] = len(chunks) + chunk_metadata["source"] = source_ref + + if isinstance(arg, dict): + chunk_metadata.update(arg.get("metadata", {})) + + chunk_id = hashlib.sha256( + f"{loader_result.doc_id}_{i}_{chunk}".encode() + ).hexdigest() + + documents.append( + { + "doc_id": chunk_id, + "content": chunk, + "metadata": sanitize_metadata_for_chromadb(chunk_metadata), + } + ) + + if documents: + self._client.add_documents( + collection_name=self.collection_name, documents=documents + ) diff --git a/packages/tools/src/crewai_tools/adapters/embedchain_adapter.py b/packages/tools/src/crewai_tools/adapters/embedchain_adapter.py deleted file mode 100644 index 1e7b83c0b..000000000 --- a/packages/tools/src/crewai_tools/adapters/embedchain_adapter.py +++ /dev/null @@ -1,34 +0,0 @@ -from typing import Any - -from crewai_tools.tools.rag.rag_tool import Adapter - -try: - from embedchain import App - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - - -class EmbedchainAdapter(Adapter): - embedchain_app: Any # Will be App when embedchain is available - summarize: bool = False - - def __init__(self, **data): - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") - super().__init__(**data) - - def query(self, question: str) -> str: - result, sources = self.embedchain_app.query( - question, citations=True, dry_run=(not self.summarize) - ) - if self.summarize: - return result - return "\n\n".join([source[0] for source in sources]) - - def add( - self, - *args: Any, - **kwargs: Any, - ) -> None: - self.embedchain_app.add(*args, **kwargs) diff --git a/packages/tools/src/crewai_tools/adapters/enterprise_adapter.py b/packages/tools/src/crewai_tools/adapters/enterprise_adapter.py index c4bfa35eb..d534ddb25 100644 --- a/packages/tools/src/crewai_tools/adapters/enterprise_adapter.py +++ b/packages/tools/src/crewai_tools/adapters/enterprise_adapter.py @@ -1,11 +1,12 @@ -import os import json -import requests -import warnings -from typing import List, Any, Dict, Literal, Optional, Union, get_origin, Type, cast -from pydantic import Field, create_model -from crewai.tools import BaseTool +import os import re +import warnings +from typing import Any, Literal, Optional, Union, cast, get_origin + +import requests +from crewai.tools import BaseTool +from pydantic import Field, create_model def get_enterprise_api_base_url() -> str: @@ -13,6 +14,7 @@ def get_enterprise_api_base_url() -> str: base_url = os.getenv("CREWAI_PLUS_URL", "https://app.crewai.com") return f"{base_url}/crewai_plus/api/v1/integrations" + ENTERPRISE_API_BASE_URL = get_enterprise_api_base_url() @@ -23,7 +25,7 @@ class EnterpriseActionTool(BaseTool): default="", description="The enterprise action token" ) action_name: str = Field(default="", description="The name of the action") - action_schema: Dict[str, Any] = Field( + action_schema: dict[str, Any] = Field( default={}, description="The schema of the action" ) enterprise_api_base_url: str = Field( @@ -36,8 +38,8 @@ class EnterpriseActionTool(BaseTool): description: str, enterprise_action_token: str, action_name: str, - action_schema: Dict[str, Any], - enterprise_api_base_url: Optional[str] = None, + action_schema: dict[str, Any], + enterprise_api_base_url: str | None = None, ): self._model_registry = {} self._base_name = self._sanitize_name(name) @@ -86,7 +88,9 @@ class EnterpriseActionTool(BaseTool): self.enterprise_action_token = enterprise_action_token self.action_name = action_name self.action_schema = action_schema - self.enterprise_api_base_url = enterprise_api_base_url or get_enterprise_api_base_url() + self.enterprise_api_base_url = ( + enterprise_api_base_url or get_enterprise_api_base_url() + ) def _sanitize_name(self, name: str) -> str: """Sanitize names to create proper Python class names.""" @@ -95,8 +99,8 @@ class EnterpriseActionTool(BaseTool): return "".join(word.capitalize() for word in parts if word) def _extract_schema_info( - self, action_schema: Dict[str, Any] - ) -> tuple[Dict[str, Any], List[str]]: + self, action_schema: dict[str, Any] + ) -> tuple[dict[str, Any], list[str]]: """Extract schema properties and required fields from action schema.""" schema_props = ( action_schema.get("function", {}) @@ -108,7 +112,7 @@ class EnterpriseActionTool(BaseTool): ) return schema_props, required - def _process_schema_type(self, schema: Dict[str, Any], type_name: str) -> Type[Any]: + def _process_schema_type(self, schema: dict[str, Any], type_name: str) -> type[Any]: """Process a JSON schema and return appropriate Python type.""" if "anyOf" in schema: any_of_types = schema["anyOf"] @@ -118,7 +122,7 @@ class EnterpriseActionTool(BaseTool): if non_null_types: base_type = self._process_schema_type(non_null_types[0], type_name) return Optional[base_type] if is_nullable else base_type - return cast(Type[Any], Optional[str]) + return cast(type[Any], Optional[str]) if "oneOf" in schema: return self._process_schema_type(schema["oneOf"][0], type_name) @@ -137,14 +141,16 @@ class EnterpriseActionTool(BaseTool): if json_type == "array": items_schema = schema.get("items", {"type": "string"}) item_type = self._process_schema_type(items_schema, f"{type_name}Item") - return List[item_type] + return list[item_type] if json_type == "object": return self._create_nested_model(schema, type_name) return self._map_json_type_to_python(json_type) - def _create_nested_model(self, schema: Dict[str, Any], model_name: str) -> Type[Any]: + def _create_nested_model( + self, schema: dict[str, Any], model_name: str + ) -> type[Any]: """Create a nested Pydantic model for complex objects.""" full_model_name = f"{self._base_name}{model_name}" @@ -183,21 +189,19 @@ class EnterpriseActionTool(BaseTool): return dict def _create_field_definition( - self, field_type: Type[Any], is_required: bool, description: str + self, field_type: type[Any], is_required: bool, description: str ) -> tuple: """Create Pydantic field definition based on type and requirement.""" if is_required: return (field_type, Field(description=description)) - else: - if get_origin(field_type) is Union: - return (field_type, Field(default=None, description=description)) - else: - return ( - Optional[field_type], - Field(default=None, description=description), - ) + if get_origin(field_type) is Union: + return (field_type, Field(default=None, description=description)) + return ( + Optional[field_type], + Field(default=None, description=description), + ) - def _map_json_type_to_python(self, json_type: str) -> Type[Any]: + def _map_json_type_to_python(self, json_type: str) -> type[Any]: """Map basic JSON schema types to Python types.""" type_mapping = { "string": str, @@ -210,7 +214,7 @@ class EnterpriseActionTool(BaseTool): } return type_mapping.get(json_type, str) - def _get_required_nullable_fields(self) -> List[str]: + def _get_required_nullable_fields(self) -> list[str]: """Get a list of required nullable fields from the action schema.""" schema_props, required = self._extract_schema_info(self.action_schema) @@ -222,7 +226,7 @@ class EnterpriseActionTool(BaseTool): return required_nullable_fields - def _is_nullable_type(self, schema: Dict[str, Any]) -> bool: + def _is_nullable_type(self, schema: dict[str, Any]) -> bool: """Check if a schema represents a nullable type.""" if "anyOf" in schema: return any(t.get("type") == "null" for t in schema["anyOf"]) @@ -242,8 +246,9 @@ class EnterpriseActionTool(BaseTool): if field_name not in cleaned_kwargs: cleaned_kwargs[field_name] = None - - api_url = f"{self.enterprise_api_base_url}/actions/{self.action_name}/execute" + api_url = ( + f"{self.enterprise_api_base_url}/actions/{self.action_name}/execute" + ) headers = { "Authorization": f"Bearer {self.enterprise_action_token}", "Content-Type": "application/json", @@ -262,7 +267,7 @@ class EnterpriseActionTool(BaseTool): return json.dumps(data, indent=2) except Exception as e: - return f"Error executing action {self.action_name}: {str(e)}" + return f"Error executing action {self.action_name}: {e!s}" class EnterpriseActionKitToolAdapter: @@ -271,15 +276,17 @@ class EnterpriseActionKitToolAdapter: def __init__( self, enterprise_action_token: str, - enterprise_api_base_url: Optional[str] = None, + enterprise_api_base_url: str | None = None, ): """Initialize the adapter with an enterprise action token.""" self._set_enterprise_action_token(enterprise_action_token) self._actions_schema = {} self._tools = None - self.enterprise_api_base_url = enterprise_api_base_url or get_enterprise_api_base_url() + self.enterprise_api_base_url = ( + enterprise_api_base_url or get_enterprise_api_base_url() + ) - def tools(self) -> List[BaseTool]: + def tools(self) -> list[BaseTool]: """Get the list of tools created from enterprise actions.""" if self._tools is None: self._fetch_actions() @@ -289,13 +296,10 @@ class EnterpriseActionKitToolAdapter: def _fetch_actions(self): """Fetch available actions from the API.""" try: - actions_url = f"{self.enterprise_api_base_url}/actions" headers = {"Authorization": f"Bearer {self.enterprise_action_token}"} - response = requests.get( - actions_url, headers=headers, timeout=30 - ) + response = requests.get(actions_url, headers=headers, timeout=30) response.raise_for_status() raw_data = response.json() @@ -306,7 +310,7 @@ class EnterpriseActionKitToolAdapter: parsed_schema = {} action_categories = raw_data["actions"] - for integration_type, action_list in action_categories.items(): + for action_list in action_categories.values(): if isinstance(action_list, list): for action in action_list: action_name = action.get("name") @@ -314,8 +318,10 @@ class EnterpriseActionKitToolAdapter: action_schema = { "function": { "name": action_name, - "description": action.get("description", f"Execute {action_name}"), - "parameters": action.get("parameters", {}) + "description": action.get( + "description", f"Execute {action_name}" + ), + "parameters": action.get("parameters", {}), } } parsed_schema[action_name] = action_schema @@ -329,8 +335,8 @@ class EnterpriseActionKitToolAdapter: traceback.print_exc() def _generate_detailed_description( - self, schema: Dict[str, Any], indent: int = 0 - ) -> List[str]: + self, schema: dict[str, Any], indent: int = 0 + ) -> list[str]: """Generate detailed description for nested schema structures.""" descriptions = [] indent_str = " " * indent @@ -407,15 +413,17 @@ class EnterpriseActionKitToolAdapter: self._tools = tools - def _set_enterprise_action_token(self, enterprise_action_token: Optional[str]): + def _set_enterprise_action_token(self, enterprise_action_token: str | None): if enterprise_action_token and not enterprise_action_token.startswith("PK_"): warnings.warn( "Legacy token detected, please consider using the new Enterprise Action Auth token. Check out our docs for more information https://docs.crewai.com/en/enterprise/features/integrations.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) - token = enterprise_action_token or os.environ.get("CREWAI_ENTERPRISE_TOOLS_TOKEN") + token = enterprise_action_token or os.environ.get( + "CREWAI_ENTERPRISE_TOOLS_TOKEN" + ) self.enterprise_action_token = token diff --git a/packages/tools/src/crewai_tools/adapters/lancedb_adapter.py b/packages/tools/src/crewai_tools/adapters/lancedb_adapter.py index c91423048..9819014f0 100644 --- a/packages/tools/src/crewai_tools/adapters/lancedb_adapter.py +++ b/packages/tools/src/crewai_tools/adapters/lancedb_adapter.py @@ -1,14 +1,14 @@ +from collections.abc import Callable from pathlib import Path -from typing import Any, Callable +from typing import Any +from crewai_tools.tools.rag.rag_tool import Adapter from lancedb import DBConnection as LanceDBConnection from lancedb import connect as lancedb_connect from lancedb.table import Table as LanceDBTable from openai import Client as OpenAIClient from pydantic import Field, PrivateAttr -from crewai_tools.tools.rag.rag_tool import Adapter - def _default_embedding_function(): client = OpenAIClient() diff --git a/packages/tools/src/crewai_tools/adapters/mcp_adapter.py b/packages/tools/src/crewai_tools/adapters/mcp_adapter.py index 8e602f376..8c39bd0ff 100644 --- a/packages/tools/src/crewai_tools/adapters/mcp_adapter.py +++ b/packages/tools/src/crewai_tools/adapters/mcp_adapter.py @@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Any from crewai.tools import BaseTool from crewai_tools.adapters.tool_collection import ToolCollection + """ MCPServer for CrewAI. @@ -103,8 +104,8 @@ class MCPServerAdapter: try: subprocess.run(["uv", "add", "mcp crewai-tools[mcp]"], check=True) - except subprocess.CalledProcessError: - raise ImportError("Failed to install mcp package") + except subprocess.CalledProcessError as e: + raise ImportError("Failed to install mcp package") from e else: raise ImportError( "`mcp` package not found, please run `uv add crewai-tools[mcp]`" @@ -112,7 +113,9 @@ class MCPServerAdapter: try: self._serverparams = serverparams - self._adapter = MCPAdapt(self._serverparams, CrewAIAdapter(), connect_timeout) + self._adapter = MCPAdapt( + self._serverparams, CrewAIAdapter(), connect_timeout + ) self.start() except Exception as e: diff --git a/packages/tools/src/crewai_tools/adapters/pdf_embedchain_adapter.py b/packages/tools/src/crewai_tools/adapters/pdf_embedchain_adapter.py deleted file mode 100644 index aa682c84f..000000000 --- a/packages/tools/src/crewai_tools/adapters/pdf_embedchain_adapter.py +++ /dev/null @@ -1,41 +0,0 @@ -from typing import Any, Optional - -from crewai_tools.tools.rag.rag_tool import Adapter - -try: - from embedchain import App - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - - -class PDFEmbedchainAdapter(Adapter): - embedchain_app: Any # Will be App when embedchain is available - summarize: bool = False - src: Optional[str] = None - - def __init__(self, **data): - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") - super().__init__(**data) - - def query(self, question: str) -> str: - where = ( - {"app_id": self.embedchain_app.config.id, "source": self.src} - if self.src - else None - ) - result, sources = self.embedchain_app.query( - question, citations=True, dry_run=(not self.summarize), where=where - ) - if self.summarize: - return result - return "\n\n".join([source[0] for source in sources]) - - def add( - self, - *args: Any, - **kwargs: Any, - ) -> None: - self.src = args[0] if args else None - self.embedchain_app.add(*args, **kwargs) diff --git a/packages/tools/src/crewai_tools/adapters/rag_adapter.py b/packages/tools/src/crewai_tools/adapters/rag_adapter.py index 78011328c..871a7defb 100644 --- a/packages/tools/src/crewai_tools/adapters/rag_adapter.py +++ b/packages/tools/src/crewai_tools/adapters/rag_adapter.py @@ -1,4 +1,4 @@ -from typing import Any, Optional +from typing import Any from crewai_tools.rag.core import RAG from crewai_tools.tools.rag.rag_tool import Adapter @@ -8,26 +8,23 @@ class RAGAdapter(Adapter): def __init__( self, collection_name: str = "crewai_knowledge_base", - persist_directory: Optional[str] = None, + persist_directory: str | None = None, embedding_model: str = "text-embedding-3-small", top_k: int = 5, - embedding_api_key: Optional[str] = None, - **embedding_kwargs + embedding_api_key: str | None = None, + **embedding_kwargs, ): super().__init__() # Prepare embedding configuration - embedding_config = { - "api_key": embedding_api_key, - **embedding_kwargs - } + embedding_config = {"api_key": embedding_api_key, **embedding_kwargs} self._adapter = RAG( collection_name=collection_name, persist_directory=persist_directory, embedding_model=embedding_model, top_k=top_k, - embedding_config=embedding_config + embedding_config=embedding_config, ) def query(self, question: str) -> str: diff --git a/packages/tools/src/crewai_tools/adapters/tool_collection.py b/packages/tools/src/crewai_tools/adapters/tool_collection.py index 291fa8f82..895e6cb23 100644 --- a/packages/tools/src/crewai_tools/adapters/tool_collection.py +++ b/packages/tools/src/crewai_tools/adapters/tool_collection.py @@ -1,7 +1,10 @@ -from typing import List, Optional, Union, TypeVar, Generic, Dict, Callable +from collections.abc import Callable +from typing import Generic, TypeVar + from crewai.tools import BaseTool -T = TypeVar('T', bound=BaseTool) +T = TypeVar("T", bound=BaseTool) + class ToolCollection(list, Generic[T]): """ @@ -18,15 +21,15 @@ class ToolCollection(list, Generic[T]): search_tool = tools["search"] """ - def __init__(self, tools: Optional[List[T]] = None): + def __init__(self, tools: list[T] | None = None): super().__init__(tools or []) - self._name_cache: Dict[str, T] = {} + self._name_cache: dict[str, T] = {} self._build_name_cache() def _build_name_cache(self) -> None: self._name_cache = {tool.name.lower(): tool for tool in self} - def __getitem__(self, key: Union[int, str]) -> T: + def __getitem__(self, key: int | str) -> T: if isinstance(key, str): return self._name_cache[key.lower()] return super().__getitem__(key) @@ -35,7 +38,7 @@ class ToolCollection(list, Generic[T]): super().append(tool) self._name_cache[tool.name.lower()] = tool - def extend(self, tools: List[T]) -> None: + def extend(self, tools: list[T]) -> None: super().extend(tools) self._build_name_cache() @@ -54,7 +57,7 @@ class ToolCollection(list, Generic[T]): del self._name_cache[tool.name.lower()] return tool - def filter_by_names(self, names: Optional[List[str]] = None) -> "ToolCollection[T]": + def filter_by_names(self, names: list[str] | None = None) -> "ToolCollection[T]": if names is None: return self @@ -71,4 +74,4 @@ class ToolCollection(list, Generic[T]): def clear(self) -> None: super().clear() - self._name_cache.clear() \ No newline at end of file + self._name_cache.clear() diff --git a/packages/tools/src/crewai_tools/adapters/zapier_adapter.py b/packages/tools/src/crewai_tools/adapters/zapier_adapter.py index 78c996964..c8243ec0c 100644 --- a/packages/tools/src/crewai_tools/adapters/zapier_adapter.py +++ b/packages/tools/src/crewai_tools/adapters/zapier_adapter.py @@ -1,6 +1,5 @@ -import os import logging -from typing import List +import os import requests from crewai.tools import BaseTool @@ -42,7 +41,7 @@ class ZapierActionTool(BaseTool): execute_url = f"{ACTIONS_URL}/{self.action_id}/execute/" response = requests.request( - "POST", execute_url, headers=headers, json=action_params + "POST", execute_url, headers=headers, json=action_params, timeout=30 ) response.raise_for_status() @@ -57,7 +56,7 @@ class ZapierActionsAdapter: api_key: str - def __init__(self, api_key: str = None): + def __init__(self, api_key: str | None = None): self.api_key = api_key or os.getenv("ZAPIER_API_KEY") if not self.api_key: logger.error("Zapier Actions API key is required") @@ -67,13 +66,12 @@ class ZapierActionsAdapter: headers = { "x-api-key": self.api_key, } - response = requests.request("GET", ACTIONS_URL, headers=headers) + response = requests.request("GET", ACTIONS_URL, headers=headers, timeout=30) response.raise_for_status() - response_json = response.json() - return response_json + return response.json() - def tools(self) -> List[BaseTool]: + def tools(self) -> list[BaseTool]: """Convert Zapier actions to BaseTool instances""" actions_response = self.get_zapier_actions() tools = [] diff --git a/packages/tools/src/crewai_tools/aws/__init__.py b/packages/tools/src/crewai_tools/aws/__init__.py index b2d279078..bb61b8218 100644 --- a/packages/tools/src/crewai_tools/aws/__init__.py +++ b/packages/tools/src/crewai_tools/aws/__init__.py @@ -1,16 +1,16 @@ -from .s3 import S3ReaderTool, S3WriterTool from .bedrock import ( - BedrockKBRetrieverTool, BedrockInvokeAgentTool, + BedrockKBRetrieverTool, create_browser_toolkit, create_code_interpreter_toolkit, ) +from .s3 import S3ReaderTool, S3WriterTool __all__ = [ + "BedrockInvokeAgentTool", + "BedrockKBRetrieverTool", "S3ReaderTool", "S3WriterTool", - "BedrockKBRetrieverTool", - "BedrockInvokeAgentTool", "create_browser_toolkit", - "create_code_interpreter_toolkit" + "create_code_interpreter_toolkit", ] diff --git a/packages/tools/src/crewai_tools/aws/bedrock/__init__.py b/packages/tools/src/crewai_tools/aws/bedrock/__init__.py index 58fc5bca9..2b72d3d29 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/__init__.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/__init__.py @@ -1,11 +1,11 @@ -from .knowledge_base.retriever_tool import BedrockKBRetrieverTool from .agents.invoke_agent_tool import BedrockInvokeAgentTool from .browser import create_browser_toolkit from .code_interpreter import create_code_interpreter_toolkit +from .knowledge_base.retriever_tool import BedrockKBRetrieverTool __all__ = [ - "BedrockKBRetrieverTool", "BedrockInvokeAgentTool", + "BedrockKBRetrieverTool", "create_browser_toolkit", - "create_code_interpreter_toolkit" + "create_code_interpreter_toolkit", ] diff --git a/packages/tools/src/crewai_tools/aws/bedrock/agents/invoke_agent_tool.py b/packages/tools/src/crewai_tools/aws/bedrock/agents/invoke_agent_tool.py index 65280fe7b..1fa4d3067 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/agents/invoke_agent_tool.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/agents/invoke_agent_tool.py @@ -1,12 +1,11 @@ -from typing import Type, Optional, Dict, Any, List -import os import json -import uuid +import os import time from datetime import datetime, timezone -from dotenv import load_dotenv +from typing import ClassVar from crewai.tools import BaseTool +from dotenv import load_dotenv from pydantic import BaseModel, Field from ..exceptions import BedrockAgentError, BedrockValidationError @@ -17,29 +16,30 @@ load_dotenv() class BedrockInvokeAgentToolInput(BaseModel): """Input schema for BedrockInvokeAgentTool.""" + query: str = Field(..., description="The query to send to the agent") class BedrockInvokeAgentTool(BaseTool): name: str = "Bedrock Agent Invoke Tool" description: str = "An agent responsible for policy analysis." - args_schema: Type[BaseModel] = BedrockInvokeAgentToolInput + args_schema: type[BaseModel] = BedrockInvokeAgentToolInput agent_id: str = None agent_alias_id: str = None session_id: str = None enable_trace: bool = False end_session: bool = False - package_dependencies: List[str] = ["boto3"] + package_dependencies: ClassVar[list[str]] = ["boto3"] def __init__( self, - agent_id: str = None, - agent_alias_id: str = None, - session_id: str = None, + agent_id: str | None = None, + agent_alias_id: str | None = None, + session_id: str | None = None, enable_trace: bool = False, end_session: bool = False, - description: Optional[str] = None, - **kwargs + description: str | None = None, + **kwargs, ): """Initialize the BedrockInvokeAgentTool with agent configuration. @@ -54,9 +54,11 @@ class BedrockInvokeAgentTool(BaseTool): super().__init__(**kwargs) # Get values from environment variables if not provided - self.agent_id = agent_id or os.getenv('BEDROCK_AGENT_ID') - self.agent_alias_id = agent_alias_id or os.getenv('BEDROCK_AGENT_ALIAS_ID') - self.session_id = session_id or str(int(time.time())) # Use timestamp as session ID if not provided + self.agent_id = agent_id or os.getenv("BEDROCK_AGENT_ID") + self.agent_alias_id = agent_alias_id or os.getenv("BEDROCK_AGENT_ALIAS_ID") + self.session_id = session_id or str( + int(time.time()) + ) # Use timestamp as session ID if not provided self.enable_trace = enable_trace self.end_session = end_session @@ -87,20 +89,22 @@ class BedrockInvokeAgentTool(BaseTool): raise BedrockValidationError("session_id must be a string") except BedrockValidationError as e: - raise BedrockValidationError(f"Parameter validation failed: {str(e)}") + raise BedrockValidationError(f"Parameter validation failed: {e!s}") from e def _run(self, query: str) -> str: try: import boto3 from botocore.exceptions import ClientError - except ImportError: - raise ImportError("`boto3` package not found, please run `uv add boto3`") + except ImportError as e: + raise ImportError("`boto3` package not found, please run `uv add boto3`") from e try: # Initialize the Bedrock Agent Runtime client bedrock_agent = boto3.client( "bedrock-agent-runtime", - region_name=os.getenv('AWS_REGION', os.getenv('AWS_DEFAULT_REGION', 'us-west-2')) + region_name=os.getenv( + "AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-west-2") + ), ) # Format the prompt with current time @@ -119,28 +123,28 @@ Below is the users query or task. Complete it and answer it consicely and to the sessionId=self.session_id, inputText=prompt, enableTrace=self.enable_trace, - endSession=self.end_session + endSession=self.end_session, ) # Process the response completion = "" # Check if response contains a completion field - if 'completion' in response: + if "completion" in response: # Process streaming response format - for event in response.get('completion', []): - if 'chunk' in event and 'bytes' in event['chunk']: - chunk_bytes = event['chunk']['bytes'] + for event in response.get("completion", []): + if "chunk" in event and "bytes" in event["chunk"]: + chunk_bytes = event["chunk"]["bytes"] if isinstance(chunk_bytes, (bytes, bytearray)): - completion += chunk_bytes.decode('utf-8') + completion += chunk_bytes.decode("utf-8") else: completion += str(chunk_bytes) # If no completion found in streaming format, try direct format - if not completion and 'chunk' in response and 'bytes' in response['chunk']: - chunk_bytes = response['chunk']['bytes'] + if not completion and "chunk" in response and "bytes" in response["chunk"]: + chunk_bytes = response["chunk"]["bytes"] if isinstance(chunk_bytes, (bytes, bytearray)): - completion = chunk_bytes.decode('utf-8') + completion = chunk_bytes.decode("utf-8") else: completion = str(chunk_bytes) @@ -148,14 +152,16 @@ Below is the users query or task. Complete it and answer it consicely and to the if not completion: debug_info = { "error": "Could not extract completion from response", - "response_keys": list(response.keys()) + "response_keys": list(response.keys()), } # Add more debug info - if 'chunk' in response: - debug_info["chunk_keys"] = list(response['chunk'].keys()) + if "chunk" in response: + debug_info["chunk_keys"] = list(response["chunk"].keys()) - raise BedrockAgentError(f"Failed to extract completion: {json.dumps(debug_info, indent=2)}") + raise BedrockAgentError( + f"Failed to extract completion: {json.dumps(debug_info, indent=2)}" + ) return completion @@ -164,13 +170,13 @@ Below is the users query or task. Complete it and answer it consicely and to the error_message = str(e) # Try to extract error code if available - if hasattr(e, 'response') and 'Error' in e.response: - error_code = e.response['Error'].get('Code', 'Unknown') - error_message = e.response['Error'].get('Message', str(e)) + if hasattr(e, "response") and "Error" in e.response: + error_code = e.response["Error"].get("Code", "Unknown") + error_message = e.response["Error"].get("Message", str(e)) - raise BedrockAgentError(f"Error ({error_code}): {error_message}") + raise BedrockAgentError(f"Error ({error_code}): {error_message}") from e except BedrockAgentError: # Re-raise BedrockAgentError exceptions raise except Exception as e: - raise BedrockAgentError(f"Unexpected error: {str(e)}") \ No newline at end of file + raise BedrockAgentError(f"Unexpected error: {e!s}") from e diff --git a/packages/tools/src/crewai_tools/aws/bedrock/browser/__init__.py b/packages/tools/src/crewai_tools/aws/bedrock/browser/__init__.py index e82666ebc..63d2d4ab8 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/browser/__init__.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/browser/__init__.py @@ -1,3 +1,3 @@ from .browser_toolkit import BrowserToolkit, create_browser_toolkit -__all__ = ["BrowserToolkit", "create_browser_toolkit"] \ No newline at end of file +__all__ = ["BrowserToolkit", "create_browser_toolkit"] diff --git a/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_session_manager.py b/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_session_manager.py index d4652c320..af6026d4b 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_session_manager.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_session_manager.py @@ -1,12 +1,12 @@ from __future__ import annotations import logging -from typing import TYPE_CHECKING, Dict, Tuple +from typing import TYPE_CHECKING if TYPE_CHECKING: + from bedrock_agentcore.tools.browser_client import BrowserClient from playwright.async_api import Browser as AsyncBrowser from playwright.sync_api import Browser as SyncBrowser - from bedrock_agentcore.tools.browser_client import BrowserClient logger = logging.getLogger(__name__) @@ -28,8 +28,8 @@ class BrowserSessionManager: region: AWS region for browser client """ self.region = region - self._async_sessions: Dict[str, Tuple[BrowserClient, AsyncBrowser]] = {} - self._sync_sessions: Dict[str, Tuple[BrowserClient, SyncBrowser]] = {} + self._async_sessions: dict[str, tuple[BrowserClient, AsyncBrowser]] = {} + self._sync_sessions: dict[str, tuple[BrowserClient, SyncBrowser]] = {} async def get_async_browser(self, thread_id: str) -> AsyncBrowser: """ @@ -75,6 +75,7 @@ class BrowserSessionManager: Exception: If browser session creation fails """ from bedrock_agentcore.tools.browser_client import BrowserClient + browser_client = BrowserClient(region=self.region) try: @@ -132,6 +133,7 @@ class BrowserSessionManager: Exception: If browser session creation fails """ from bedrock_agentcore.tools.browser_client import BrowserClient + browser_client = BrowserClient(region=self.region) try: @@ -257,4 +259,4 @@ class BrowserSessionManager: for thread_id in sync_thread_ids: self.close_sync_browser(thread_id) - logger.info("All browser sessions closed") \ No newline at end of file + logger.info("All browser sessions closed") diff --git a/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_toolkit.py b/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_toolkit.py index 2939bbb00..a27214089 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_toolkit.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/browser/browser_toolkit.py @@ -1,9 +1,9 @@ """Toolkit for navigating web with AWS browser.""" +import asyncio import json import logging -import asyncio -from typing import Dict, List, Tuple, Any, Type +from typing import Any from urllib.parse import urlparse from crewai.tools import BaseTool @@ -18,78 +18,100 @@ logger = logging.getLogger(__name__) # Input schemas class NavigateToolInput(BaseModel): """Input for NavigateTool.""" + url: str = Field(description="URL to navigate to") - thread_id: str = Field(default="default", description="Thread ID for the browser session") + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) class ClickToolInput(BaseModel): """Input for ClickTool.""" + selector: str = Field(description="CSS selector for the element to click on") - thread_id: str = Field(default="default", description="Thread ID for the browser session") + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) class GetElementsToolInput(BaseModel): """Input for GetElementsTool.""" + selector: str = Field(description="CSS selector for elements to get") - thread_id: str = Field(default="default", description="Thread ID for the browser session") + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) class ExtractTextToolInput(BaseModel): """Input for ExtractTextTool.""" - thread_id: str = Field(default="default", description="Thread ID for the browser session") + + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) class ExtractHyperlinksToolInput(BaseModel): """Input for ExtractHyperlinksTool.""" - thread_id: str = Field(default="default", description="Thread ID for the browser session") + + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) class NavigateBackToolInput(BaseModel): """Input for NavigateBackTool.""" - thread_id: str = Field(default="default", description="Thread ID for the browser session") + + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) class CurrentWebPageToolInput(BaseModel): """Input for CurrentWebPageTool.""" - thread_id: str = Field(default="default", description="Thread ID for the browser session") + + thread_id: str = Field( + default="default", description="Thread ID for the browser session" + ) # Base tool class class BrowserBaseTool(BaseTool): """Base class for browser tools.""" - + def __init__(self, session_manager: BrowserSessionManager): """Initialize with a session manager.""" super().__init__() self._session_manager = session_manager - - if self._is_in_asyncio_loop() and hasattr(self, '_arun'): + + if self._is_in_asyncio_loop() and hasattr(self, "_arun"): self._original_run = self._run + # Override _run to use _arun when in an asyncio loop def patched_run(*args, **kwargs): try: import nest_asyncio + loop = asyncio.get_event_loop() nest_asyncio.apply(loop) return asyncio.get_event_loop().run_until_complete( self._arun(*args, **kwargs) ) except Exception as e: - return f"Error in patched _run: {str(e)}" + return f"Error in patched _run: {e!s}" + self._run = patched_run - + async def get_async_page(self, thread_id: str) -> Any: """Get or create a page for the specified thread.""" browser = await self._session_manager.get_async_browser(thread_id) - page = await aget_current_page(browser) - return page - + return await aget_current_page(browser) + def get_sync_page(self, thread_id: str) -> Any: """Get or create a page for the specified thread.""" browser = self._session_manager.get_sync_browser(thread_id) - page = get_current_page(browser) - return page - + return get_current_page(browser) + def _is_in_asyncio_loop(self) -> bool: """Check if we're currently in an asyncio event loop.""" try: @@ -105,8 +127,8 @@ class NavigateTool(BrowserBaseTool): name: str = "navigate_browser" description: str = "Navigate a browser to the specified URL" - args_schema: Type[BaseModel] = NavigateToolInput - + args_schema: type[BaseModel] = NavigateToolInput + def _run(self, url: str, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -123,7 +145,7 @@ class NavigateTool(BrowserBaseTool): status = response.status if response else "unknown" return f"Navigating to {url} returned status code {status}" except Exception as e: - return f"Error navigating to {url}: {str(e)}" + return f"Error navigating to {url}: {e!s}" async def _arun(self, url: str, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -141,7 +163,7 @@ class NavigateTool(BrowserBaseTool): status = response.status if response else "unknown" return f"Navigating to {url} returned status code {status}" except Exception as e: - return f"Error navigating to {url}: {str(e)}" + return f"Error navigating to {url}: {e!s}" class ClickTool(BrowserBaseTool): @@ -149,8 +171,8 @@ class ClickTool(BrowserBaseTool): name: str = "click_element" description: str = "Click on an element with the given CSS selector" - args_schema: Type[BaseModel] = ClickToolInput - + args_schema: type[BaseModel] = ClickToolInput + visible_only: bool = True """Whether to consider only visible elements.""" playwright_strict: bool = False @@ -162,7 +184,7 @@ class ClickTool(BrowserBaseTool): if not self.visible_only: return selector return f"{selector} >> visible=1" - + def _run(self, selector: str, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -172,7 +194,7 @@ class ClickTool(BrowserBaseTool): # Click on the element selector_effective = self._selector_effective(selector=selector) from playwright.sync_api import TimeoutError as PlaywrightTimeoutError - + try: page.click( selector_effective, @@ -182,11 +204,11 @@ class ClickTool(BrowserBaseTool): except PlaywrightTimeoutError: return f"Unable to click on element '{selector}'" except Exception as click_error: - return f"Unable to click on element '{selector}': {str(click_error)}" - + return f"Unable to click on element '{selector}': {click_error!s}" + return f"Clicked element '{selector}'" except Exception as e: - return f"Error clicking on element: {str(e)}" + return f"Error clicking on element: {e!s}" async def _arun(self, selector: str, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -197,7 +219,7 @@ class ClickTool(BrowserBaseTool): # Click on the element selector_effective = self._selector_effective(selector=selector) from playwright.async_api import TimeoutError as PlaywrightTimeoutError - + try: await page.click( selector_effective, @@ -207,19 +229,20 @@ class ClickTool(BrowserBaseTool): except PlaywrightTimeoutError: return f"Unable to click on element '{selector}'" except Exception as click_error: - return f"Unable to click on element '{selector}': {str(click_error)}" - + return f"Unable to click on element '{selector}': {click_error!s}" + return f"Clicked element '{selector}'" except Exception as e: - return f"Error clicking on element: {str(e)}" + return f"Error clicking on element: {e!s}" class NavigateBackTool(BrowserBaseTool): """Tool for navigating back in browser history.""" + name: str = "navigate_back" description: str = "Navigate back to the previous page" - args_schema: Type[BaseModel] = NavigateBackToolInput - + args_schema: type[BaseModel] = NavigateBackToolInput + def _run(self, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -231,9 +254,9 @@ class NavigateBackTool(BrowserBaseTool): page.go_back() return "Navigated back to the previous page" except Exception as nav_error: - return f"Unable to navigate back: {str(nav_error)}" + return f"Unable to navigate back: {nav_error!s}" except Exception as e: - return f"Error navigating back: {str(e)}" + return f"Error navigating back: {e!s}" async def _arun(self, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -246,17 +269,18 @@ class NavigateBackTool(BrowserBaseTool): await page.go_back() return "Navigated back to the previous page" except Exception as nav_error: - return f"Unable to navigate back: {str(nav_error)}" + return f"Unable to navigate back: {nav_error!s}" except Exception as e: - return f"Error navigating back: {str(e)}" + return f"Error navigating back: {e!s}" class ExtractTextTool(BrowserBaseTool): """Tool for extracting text from a webpage.""" + name: str = "extract_text" description: str = "Extract all the text on the current webpage" - args_schema: Type[BaseModel] = ExtractTextToolInput - + args_schema: type[BaseModel] = ExtractTextToolInput + def _run(self, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -268,7 +292,7 @@ class ExtractTextTool(BrowserBaseTool): "The 'beautifulsoup4' package is required to use this tool." " Please install it with 'pip install beautifulsoup4'." ) - + # Get the current page page = self.get_sync_page(thread_id) @@ -277,7 +301,7 @@ class ExtractTextTool(BrowserBaseTool): soup = BeautifulSoup(content, "html.parser") return soup.get_text(separator="\n").strip() except Exception as e: - return f"Error extracting text: {str(e)}" + return f"Error extracting text: {e!s}" async def _arun(self, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -290,7 +314,7 @@ class ExtractTextTool(BrowserBaseTool): "The 'beautifulsoup4' package is required to use this tool." " Please install it with 'pip install beautifulsoup4'." ) - + # Get the current page page = await self.get_async_page(thread_id) @@ -299,15 +323,16 @@ class ExtractTextTool(BrowserBaseTool): soup = BeautifulSoup(content, "html.parser") return soup.get_text(separator="\n").strip() except Exception as e: - return f"Error extracting text: {str(e)}" + return f"Error extracting text: {e!s}" class ExtractHyperlinksTool(BrowserBaseTool): """Tool for extracting hyperlinks from a webpage.""" + name: str = "extract_hyperlinks" description: str = "Extract all hyperlinks on the current webpage" - args_schema: Type[BaseModel] = ExtractHyperlinksToolInput - + args_schema: type[BaseModel] = ExtractHyperlinksToolInput + def _run(self, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -319,7 +344,7 @@ class ExtractHyperlinksTool(BrowserBaseTool): "The 'beautifulsoup4' package is required to use this tool." " Please install it with 'pip install beautifulsoup4'." ) - + # Get the current page page = self.get_sync_page(thread_id) @@ -330,15 +355,15 @@ class ExtractHyperlinksTool(BrowserBaseTool): for link in soup.find_all("a", href=True): text = link.get_text().strip() href = link["href"] - if href.startswith("http") or href.startswith("https"): + if href.startswith(("http", "https")): links.append({"text": text, "url": href}) - + if not links: return "No hyperlinks found on the current page." - + return json.dumps(links, indent=2) except Exception as e: - return f"Error extracting hyperlinks: {str(e)}" + return f"Error extracting hyperlinks: {e!s}" async def _arun(self, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -351,7 +376,7 @@ class ExtractHyperlinksTool(BrowserBaseTool): "The 'beautifulsoup4' package is required to use this tool." " Please install it with 'pip install beautifulsoup4'." ) - + # Get the current page page = await self.get_async_page(thread_id) @@ -362,23 +387,24 @@ class ExtractHyperlinksTool(BrowserBaseTool): for link in soup.find_all("a", href=True): text = link.get_text().strip() href = link["href"] - if href.startswith("http") or href.startswith("https"): + if href.startswith(("http", "https")): links.append({"text": text, "url": href}) - + if not links: return "No hyperlinks found on the current page." - + return json.dumps(links, indent=2) except Exception as e: - return f"Error extracting hyperlinks: {str(e)}" + return f"Error extracting hyperlinks: {e!s}" class GetElementsTool(BrowserBaseTool): """Tool for getting elements from a webpage.""" + name: str = "get_elements" description: str = "Get elements from the webpage using a CSS selector" - args_schema: Type[BaseModel] = GetElementsToolInput - + args_schema: type[BaseModel] = GetElementsToolInput + def _run(self, selector: str, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -389,15 +415,15 @@ class GetElementsTool(BrowserBaseTool): elements = page.query_selector_all(selector) if not elements: return f"No elements found with selector '{selector}'" - + elements_text = [] for i, element in enumerate(elements): text = element.text_content() - elements_text.append(f"Element {i+1}: {text.strip()}") - + elements_text.append(f"Element {i + 1}: {text.strip()}") + return "\n".join(elements_text) except Exception as e: - return f"Error getting elements: {str(e)}" + return f"Error getting elements: {e!s}" async def _arun(self, selector: str, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -409,23 +435,24 @@ class GetElementsTool(BrowserBaseTool): elements = await page.query_selector_all(selector) if not elements: return f"No elements found with selector '{selector}'" - + elements_text = [] for i, element in enumerate(elements): text = await element.text_content() - elements_text.append(f"Element {i+1}: {text.strip()}") - + elements_text.append(f"Element {i + 1}: {text.strip()}") + return "\n".join(elements_text) except Exception as e: - return f"Error getting elements: {str(e)}" + return f"Error getting elements: {e!s}" class CurrentWebPageTool(BrowserBaseTool): """Tool for getting information about the current webpage.""" + name: str = "current_webpage" description: str = "Get information about the current webpage" - args_schema: Type[BaseModel] = CurrentWebPageToolInput - + args_schema: type[BaseModel] = CurrentWebPageToolInput + def _run(self, thread_id: str = "default", **kwargs) -> str: """Use the sync tool.""" try: @@ -437,7 +464,7 @@ class CurrentWebPageTool(BrowserBaseTool): title = page.title() return f"URL: {url}\nTitle: {title}" except Exception as e: - return f"Error getting current webpage info: {str(e)}" + return f"Error getting current webpage info: {e!s}" async def _arun(self, thread_id: str = "default", **kwargs) -> str: """Use the async tool.""" @@ -450,7 +477,7 @@ class CurrentWebPageTool(BrowserBaseTool): title = await page.title() return f"URL: {url}\nTitle: {title}" except Exception as e: - return f"Error getting current webpage info: {str(e)}" + return f"Error getting current webpage info: {e!s}" class BrowserToolkit: @@ -504,10 +531,10 @@ class BrowserToolkit: """ self.region = region self.session_manager = BrowserSessionManager(region=region) - self.tools: List[BaseTool] = [] + self.tools: list[BaseTool] = [] self._nest_current_loop() self._setup_tools() - + def _nest_current_loop(self): """Apply nest_asyncio if we're in an asyncio loop.""" try: @@ -515,9 +542,10 @@ class BrowserToolkit: if loop.is_running(): try: import nest_asyncio + nest_asyncio.apply(loop) except Exception as e: - logger.warning(f"Failed to apply nest_asyncio: {str(e)}") + logger.warning(f"Failed to apply nest_asyncio: {e!s}") except RuntimeError: pass @@ -530,10 +558,10 @@ class BrowserToolkit: ExtractTextTool(session_manager=self.session_manager), ExtractHyperlinksTool(session_manager=self.session_manager), GetElementsTool(session_manager=self.session_manager), - CurrentWebPageTool(session_manager=self.session_manager) + CurrentWebPageTool(session_manager=self.session_manager), ] - def get_tools(self) -> List[BaseTool]: + def get_tools(self) -> list[BaseTool]: """ Get the list of browser tools @@ -542,7 +570,7 @@ class BrowserToolkit: """ return self.tools - def get_tools_by_name(self) -> Dict[str, BaseTool]: + def get_tools_by_name(self) -> dict[str, BaseTool]: """ Get a dictionary of tools mapped by their names @@ -555,11 +583,11 @@ class BrowserToolkit: """Clean up all browser sessions asynchronously""" await self.session_manager.close_all_browsers() logger.info("All browser sessions cleaned up") - + def sync_cleanup(self) -> None: """Clean up all browser sessions from synchronous code""" import asyncio - + try: loop = asyncio.get_event_loop() if loop.is_running(): @@ -572,7 +600,7 @@ class BrowserToolkit: def create_browser_toolkit( region: str = "us-west-2", -) -> Tuple[BrowserToolkit, List[BaseTool]]: +) -> tuple[BrowserToolkit, list[BaseTool]]: """ Create a BrowserToolkit diff --git a/packages/tools/src/crewai_tools/aws/bedrock/browser/utils.py b/packages/tools/src/crewai_tools/aws/bedrock/browser/utils.py index 6e8b48e3a..1369f9d14 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/browser/utils.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/browser/utils.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Union +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from playwright.async_api import Browser as AsyncBrowser @@ -9,7 +9,7 @@ if TYPE_CHECKING: from playwright.sync_api import Page as SyncPage -async def aget_current_page(browser: Union[AsyncBrowser, Any]) -> AsyncPage: +async def aget_current_page(browser: AsyncBrowser | Any) -> AsyncPage: """ Asynchronously get the current page of the browser. Args: @@ -26,7 +26,7 @@ async def aget_current_page(browser: Union[AsyncBrowser, Any]) -> AsyncPage: return context.pages[-1] -def get_current_page(browser: Union[SyncBrowser, Any]) -> SyncPage: +def get_current_page(browser: SyncBrowser | Any) -> SyncPage: """ Get the current page of the browser. Args: @@ -40,4 +40,4 @@ def get_current_page(browser: Union[SyncBrowser, Any]) -> SyncPage: context = browser.contexts[0] if not context.pages: return context.new_page() - return context.pages[-1] \ No newline at end of file + return context.pages[-1] diff --git a/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/__init__.py b/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/__init__.py index 903c84e24..220275d27 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/__init__.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/__init__.py @@ -1,3 +1,6 @@ -from .code_interpreter_toolkit import CodeInterpreterToolkit, create_code_interpreter_toolkit +from .code_interpreter_toolkit import ( + CodeInterpreterToolkit, + create_code_interpreter_toolkit, +) -__all__ = ["CodeInterpreterToolkit", "create_code_interpreter_toolkit"] \ No newline at end of file +__all__ = ["CodeInterpreterToolkit", "create_code_interpreter_toolkit"] diff --git a/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/code_interpreter_toolkit.py b/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/code_interpreter_toolkit.py index 4e697cafe..2ebcedf25 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/code_interpreter_toolkit.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/code_interpreter/code_interpreter_toolkit.py @@ -1,9 +1,10 @@ """Toolkit for working with AWS Bedrock Code Interpreter.""" + from __future__ import annotations import json import logging -from typing import TYPE_CHECKING, Dict, List, Tuple, Optional, Type, Any +from typing import TYPE_CHECKING, Any from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -39,124 +40,184 @@ def extract_output_from_stream(response): output.append(f"==== File: {file_path} ====\n{file_content}\n") else: output.append(json.dumps(resource)) - + return "\n".join(output) # Input schemas class ExecuteCodeInput(BaseModel): """Input for ExecuteCode.""" + code: str = Field(description="The code to execute") - language: str = Field(default="python", description="The programming language of the code") - clear_context: bool = Field(default=False, description="Whether to clear execution context") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + language: str = Field( + default="python", description="The programming language of the code" + ) + clear_context: bool = Field( + default=False, description="Whether to clear execution context" + ) + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class ExecuteCommandInput(BaseModel): """Input for ExecuteCommand.""" + command: str = Field(description="The command to execute") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class ReadFilesInput(BaseModel): """Input for ReadFiles.""" - paths: List[str] = Field(description="List of file paths to read") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + + paths: list[str] = Field(description="List of file paths to read") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class ListFilesInput(BaseModel): """Input for ListFiles.""" + directory_path: str = Field(default="", description="Path to the directory to list") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class DeleteFilesInput(BaseModel): """Input for DeleteFiles.""" - paths: List[str] = Field(description="List of file paths to delete") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + + paths: list[str] = Field(description="List of file paths to delete") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class WriteFilesInput(BaseModel): """Input for WriteFiles.""" - files: List[Dict[str, str]] = Field(description="List of dictionaries with path and text fields") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + + files: list[dict[str, str]] = Field( + description="List of dictionaries with path and text fields" + ) + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class StartCommandInput(BaseModel): """Input for StartCommand.""" + command: str = Field(description="The command to execute asynchronously") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class GetTaskInput(BaseModel): """Input for GetTask.""" + task_id: str = Field(description="The ID of the task to check") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) class StopTaskInput(BaseModel): """Input for StopTask.""" + task_id: str = Field(description="The ID of the task to stop") - thread_id: str = Field(default="default", description="Thread ID for the code interpreter session") + thread_id: str = Field( + default="default", description="Thread ID for the code interpreter session" + ) # Tool classes class ExecuteCodeTool(BaseTool): """Tool for executing code in various languages.""" + name: str = "execute_code" description: str = "Execute code in various languages (primarily Python)" - args_schema: Type[BaseModel] = ExecuteCodeInput + args_schema: type[BaseModel] = ExecuteCodeInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - - def _run(self, code: str, language: str = "python", clear_context: bool = False, thread_id: str = "default") -> str: + + def _run( + self, + code: str, + language: str = "python", + clear_context: bool = False, + thread_id: str = "default", + ) -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Execute code response = code_interpreter.invoke( method="executeCode", - params={"code": code, "language": language, "clearContext": clear_context}, + params={ + "code": code, + "language": language, + "clearContext": clear_context, + }, ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error executing code: {str(e)}" - - async def _arun(self, code: str, language: str = "python", clear_context: bool = False, thread_id: str = "default") -> str: + return f"Error executing code: {e!s}" + + async def _arun( + self, + code: str, + language: str = "python", + clear_context: bool = False, + thread_id: str = "default", + ) -> str: # Use _run as we're working with a synchronous API that's thread-safe - return self._run(code=code, language=language, clear_context=clear_context, thread_id=thread_id) + return self._run( + code=code, + language=language, + clear_context=clear_context, + thread_id=thread_id, + ) class ExecuteCommandTool(BaseTool): """Tool for running shell commands in the code interpreter environment.""" + name: str = "execute_command" description: str = "Run shell commands in the code interpreter environment" - args_schema: Type[BaseModel] = ExecuteCommandInput + args_schema: type[BaseModel] = ExecuteCommandInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - + def _run(self, command: str, thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Execute command response = code_interpreter.invoke( method="executeCommand", params={"command": command} ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error executing command: {str(e)}" - + return f"Error executing command: {e!s}" + async def _arun(self, command: str, thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(command=command, thread_id=thread_id) @@ -164,57 +225,65 @@ class ExecuteCommandTool(BaseTool): class ReadFilesTool(BaseTool): """Tool for reading content of files in the environment.""" + name: str = "read_files" description: str = "Read content of files in the environment" - args_schema: Type[BaseModel] = ReadFilesInput + args_schema: type[BaseModel] = ReadFilesInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - - def _run(self, paths: List[str], thread_id: str = "default") -> str: + + def _run(self, paths: list[str], thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Read files - response = code_interpreter.invoke(method="readFiles", params={"paths": paths}) - + response = code_interpreter.invoke( + method="readFiles", params={"paths": paths} + ) + return extract_output_from_stream(response) except Exception as e: - return f"Error reading files: {str(e)}" - - async def _arun(self, paths: List[str], thread_id: str = "default") -> str: + return f"Error reading files: {e!s}" + + async def _arun(self, paths: list[str], thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(paths=paths, thread_id=thread_id) class ListFilesTool(BaseTool): """Tool for listing files in directories in the environment.""" + name: str = "list_files" description: str = "List files in directories in the environment" - args_schema: Type[BaseModel] = ListFilesInput + args_schema: type[BaseModel] = ListFilesInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - + def _run(self, directory_path: str = "", thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # List files response = code_interpreter.invoke( method="listFiles", params={"directoryPath": directory_path} ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error listing files: {str(e)}" - + return f"Error listing files: {e!s}" + async def _arun(self, directory_path: str = "", thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(directory_path=directory_path, thread_id=thread_id) @@ -222,89 +291,100 @@ class ListFilesTool(BaseTool): class DeleteFilesTool(BaseTool): """Tool for removing files from the environment.""" + name: str = "delete_files" description: str = "Remove files from the environment" - args_schema: Type[BaseModel] = DeleteFilesInput + args_schema: type[BaseModel] = DeleteFilesInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - - def _run(self, paths: List[str], thread_id: str = "default") -> str: + + def _run(self, paths: list[str], thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Remove files response = code_interpreter.invoke( method="removeFiles", params={"paths": paths} ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error deleting files: {str(e)}" - - async def _arun(self, paths: List[str], thread_id: str = "default") -> str: + return f"Error deleting files: {e!s}" + + async def _arun(self, paths: list[str], thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(paths=paths, thread_id=thread_id) class WriteFilesTool(BaseTool): """Tool for creating or updating files in the environment.""" + name: str = "write_files" description: str = "Create or update files in the environment" - args_schema: Type[BaseModel] = WriteFilesInput + args_schema: type[BaseModel] = WriteFilesInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - - def _run(self, files: List[Dict[str, str]], thread_id: str = "default") -> str: + + def _run(self, files: list[dict[str, str]], thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Write files response = code_interpreter.invoke( method="writeFiles", params={"content": files} ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error writing files: {str(e)}" - - async def _arun(self, files: List[Dict[str, str]], thread_id: str = "default") -> str: + return f"Error writing files: {e!s}" + + async def _arun( + self, files: list[dict[str, str]], thread_id: str = "default" + ) -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(files=files, thread_id=thread_id) class StartCommandTool(BaseTool): """Tool for starting long-running commands asynchronously.""" + name: str = "start_command_execution" description: str = "Start long-running commands asynchronously" - args_schema: Type[BaseModel] = StartCommandInput + args_schema: type[BaseModel] = StartCommandInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - + def _run(self, command: str, thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Start command execution response = code_interpreter.invoke( method="startCommandExecution", params={"command": command} ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error starting command: {str(e)}" - + return f"Error starting command: {e!s}" + async def _arun(self, command: str, thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(command=command, thread_id=thread_id) @@ -312,27 +392,32 @@ class StartCommandTool(BaseTool): class GetTaskTool(BaseTool): """Tool for checking status of async tasks.""" + name: str = "get_task" description: str = "Check status of async tasks" - args_schema: Type[BaseModel] = GetTaskInput + args_schema: type[BaseModel] = GetTaskInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - + def _run(self, task_id: str, thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Get task status - response = code_interpreter.invoke(method="getTask", params={"taskId": task_id}) - + response = code_interpreter.invoke( + method="getTask", params={"taskId": task_id} + ) + return extract_output_from_stream(response) except Exception as e: - return f"Error getting task status: {str(e)}" - + return f"Error getting task status: {e!s}" + async def _arun(self, task_id: str, thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(task_id=task_id, thread_id=thread_id) @@ -340,29 +425,32 @@ class GetTaskTool(BaseTool): class StopTaskTool(BaseTool): """Tool for stopping running tasks.""" + name: str = "stop_task" description: str = "Stop running tasks" - args_schema: Type[BaseModel] = StopTaskInput + args_schema: type[BaseModel] = StopTaskInput toolkit: Any = Field(default=None, exclude=True) - + def __init__(self, toolkit): super().__init__() self.toolkit = toolkit - + def _run(self, task_id: str, thread_id: str = "default") -> str: try: # Get or create code interpreter - code_interpreter = self.toolkit._get_or_create_interpreter(thread_id=thread_id) - + code_interpreter = self.toolkit._get_or_create_interpreter( + thread_id=thread_id + ) + # Stop task response = code_interpreter.invoke( method="stopTask", params={"taskId": task_id} ) - + return extract_output_from_stream(response) except Exception as e: - return f"Error stopping task: {str(e)}" - + return f"Error stopping task: {e!s}" + async def _arun(self, task_id: str, thread_id: str = "default") -> str: # Use _run as we're working with a synchronous API that's thread-safe return self._run(task_id=task_id, thread_id=thread_id) @@ -429,8 +517,8 @@ class CodeInterpreterToolkit: region: AWS region for the code interpreter """ self.region = region - self._code_interpreters: Dict[str, CodeInterpreter] = {} - self.tools: List[BaseTool] = [] + self._code_interpreters: dict[str, CodeInterpreter] = {} + self.tools: list[BaseTool] = [] self._setup_tools() def _setup_tools(self) -> None: @@ -444,17 +532,15 @@ class CodeInterpreterToolkit: WriteFilesTool(self), StartCommandTool(self), GetTaskTool(self), - StopTaskTool(self) + StopTaskTool(self), ] - def _get_or_create_interpreter( - self, thread_id: str = "default" - ) -> CodeInterpreter: + def _get_or_create_interpreter(self, thread_id: str = "default") -> CodeInterpreter: """Get or create a code interpreter for the specified thread. - + Args: thread_id: Thread ID for the code interpreter session - + Returns: CodeInterpreter instance """ @@ -463,6 +549,7 @@ class CodeInterpreterToolkit: # Create a new code interpreter for this thread from bedrock_agentcore.tools.code_interpreter_client import CodeInterpreter + code_interpreter = CodeInterpreter(region=self.region) code_interpreter.start() logger.info( @@ -473,8 +560,7 @@ class CodeInterpreterToolkit: self._code_interpreters[thread_id] = code_interpreter return code_interpreter - - def get_tools(self) -> List[BaseTool]: + def get_tools(self) -> list[BaseTool]: """ Get the list of code interpreter tools @@ -483,7 +569,7 @@ class CodeInterpreterToolkit: """ return self.tools - def get_tools_by_name(self) -> Dict[str, BaseTool]: + def get_tools_by_name(self) -> dict[str, BaseTool]: """ Get a dictionary of tools mapped by their names @@ -492,9 +578,9 @@ class CodeInterpreterToolkit: """ return {tool.name: tool for tool in self.tools} - async def cleanup(self, thread_id: Optional[str] = None) -> None: + async def cleanup(self, thread_id: str | None = None) -> None: """Clean up resources - + Args: thread_id: Optional thread ID to clean up. If None, cleans up all sessions. """ @@ -521,14 +607,14 @@ class CodeInterpreterToolkit: logger.warning( f"Error stopping code interpreter for thread {tid}: {e}" ) - + self._code_interpreters = {} logger.info("All code interpreter sessions cleaned up") def create_code_interpreter_toolkit( region: str = "us-west-2", -) -> Tuple[CodeInterpreterToolkit, List[BaseTool]]: +) -> tuple[CodeInterpreterToolkit, list[BaseTool]]: """ Create a CodeInterpreterToolkit @@ -540,4 +626,4 @@ def create_code_interpreter_toolkit( """ toolkit = CodeInterpreterToolkit(region=region) tools = toolkit.get_tools() - return toolkit, tools \ No newline at end of file + return toolkit, tools diff --git a/packages/tools/src/crewai_tools/aws/bedrock/exceptions.py b/packages/tools/src/crewai_tools/aws/bedrock/exceptions.py index d1aa2623c..4c61a185a 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/exceptions.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/exceptions.py @@ -1,17 +1,17 @@ """Custom exceptions for AWS Bedrock integration.""" + class BedrockError(Exception): """Base exception for Bedrock-related errors.""" - pass + class BedrockAgentError(BedrockError): """Exception raised for errors in the Bedrock Agent operations.""" - pass + class BedrockKnowledgeBaseError(BedrockError): """Exception raised for errors in the Bedrock Knowledge Base operations.""" - pass + class BedrockValidationError(BedrockError): """Exception raised for validation errors in Bedrock operations.""" - pass \ No newline at end of file diff --git a/packages/tools/src/crewai_tools/aws/bedrock/knowledge_base/retriever_tool.py b/packages/tools/src/crewai_tools/aws/bedrock/knowledge_base/retriever_tool.py index 06fd3ce38..eb30967e4 100644 --- a/packages/tools/src/crewai_tools/aws/bedrock/knowledge_base/retriever_tool.py +++ b/packages/tools/src/crewai_tools/aws/bedrock/knowledge_base/retriever_tool.py @@ -1,9 +1,9 @@ -from typing import Type, Optional, List, Dict, Any -import os import json -from dotenv import load_dotenv +import os +from typing import Any from crewai.tools import BaseTool +from dotenv import load_dotenv from pydantic import BaseModel, Field from ..exceptions import BedrockKnowledgeBaseError, BedrockValidationError @@ -14,28 +14,33 @@ load_dotenv() class BedrockKBRetrieverToolInput(BaseModel): """Input schema for BedrockKBRetrieverTool.""" - query: str = Field(..., description="The query to retrieve information from the knowledge base") + + query: str = Field( + ..., description="The query to retrieve information from the knowledge base" + ) class BedrockKBRetrieverTool(BaseTool): name: str = "Bedrock Knowledge Base Retriever Tool" - description: str = "Retrieves information from an Amazon Bedrock Knowledge Base given a query" - args_schema: Type[BaseModel] = BedrockKBRetrieverToolInput + description: str = ( + "Retrieves information from an Amazon Bedrock Knowledge Base given a query" + ) + args_schema: type[BaseModel] = BedrockKBRetrieverToolInput knowledge_base_id: str = None - number_of_results: Optional[int] = 5 - retrieval_configuration: Optional[Dict[str, Any]] = None - guardrail_configuration: Optional[Dict[str, Any]] = None - next_token: Optional[str] = None - package_dependencies: List[str] = ["boto3"] + number_of_results: int | None = 5 + retrieval_configuration: dict[str, Any] | None = None + guardrail_configuration: dict[str, Any] | None = None + next_token: str | None = None + package_dependencies: list[str] = ["boto3"] def __init__( self, - knowledge_base_id: str = None, - number_of_results: Optional[int] = 5, - retrieval_configuration: Optional[Dict[str, Any]] = None, - guardrail_configuration: Optional[Dict[str, Any]] = None, - next_token: Optional[str] = None, - **kwargs + knowledge_base_id: str | None = None, + number_of_results: int | None = 5, + retrieval_configuration: dict[str, Any] | None = None, + guardrail_configuration: dict[str, Any] | None = None, + next_token: str | None = None, + **kwargs, ): """Initialize the BedrockKBRetrieverTool with knowledge base configuration. @@ -49,7 +54,7 @@ class BedrockKBRetrieverTool(BaseTool): super().__init__(**kwargs) # Get knowledge_base_id from environment variable if not provided - self.knowledge_base_id = knowledge_base_id or os.getenv('BEDROCK_KB_ID') + self.knowledge_base_id = knowledge_base_id or os.getenv("BEDROCK_KB_ID") self.number_of_results = number_of_results self.guardrail_configuration = guardrail_configuration self.next_token = next_token @@ -66,7 +71,7 @@ class BedrockKBRetrieverTool(BaseTool): # Update the description to include the knowledge base details self.description = f"Retrieves information from Amazon Bedrock Knowledge Base '{self.knowledge_base_id}' given a query" - def _build_retrieval_configuration(self) -> Dict[str, Any]: + def _build_retrieval_configuration(self) -> dict[str, Any]: """Build the retrieval configuration based on provided parameters. Returns: @@ -89,17 +94,23 @@ class BedrockKBRetrieverTool(BaseTool): if not isinstance(self.knowledge_base_id, str): raise BedrockValidationError("knowledge_base_id must be a string") if len(self.knowledge_base_id) > 10: - raise BedrockValidationError("knowledge_base_id must be 10 characters or less") + raise BedrockValidationError( + "knowledge_base_id must be 10 characters or less" + ) if not all(c.isalnum() for c in self.knowledge_base_id): - raise BedrockValidationError("knowledge_base_id must contain only alphanumeric characters") + raise BedrockValidationError( + "knowledge_base_id must contain only alphanumeric characters" + ) # Validate next_token if provided if self.next_token: if not isinstance(self.next_token, str): raise BedrockValidationError("next_token must be a string") if len(self.next_token) < 1 or len(self.next_token) > 2048: - raise BedrockValidationError("next_token must be between 1 and 2048 characters") - if ' ' in self.next_token: + raise BedrockValidationError( + "next_token must be between 1 and 2048 characters" + ) + if " " in self.next_token: raise BedrockValidationError("next_token cannot contain spaces") # Validate number_of_results if provided @@ -107,12 +118,14 @@ class BedrockKBRetrieverTool(BaseTool): if not isinstance(self.number_of_results, int): raise BedrockValidationError("number_of_results must be an integer") if self.number_of_results < 1: - raise BedrockValidationError("number_of_results must be greater than 0") + raise BedrockValidationError( + "number_of_results must be greater than 0" + ) except BedrockValidationError as e: - raise BedrockValidationError(f"Parameter validation failed: {str(e)}") + raise BedrockValidationError(f"Parameter validation failed: {e!s}") - def _process_retrieval_result(self, result: Dict[str, Any]) -> Dict[str, Any]: + def _process_retrieval_result(self, result: dict[str, Any]) -> dict[str, Any]: """Process a single retrieval result from Bedrock Knowledge Base. Args: @@ -122,57 +135,57 @@ class BedrockKBRetrieverTool(BaseTool): Dict[str, Any]: Processed result with standardized format """ # Extract content - content_obj = result.get('content', {}) - content = content_obj.get('text', '') - content_type = content_obj.get('type', 'text') + content_obj = result.get("content", {}) + content = content_obj.get("text", "") + content_type = content_obj.get("type", "text") # Extract location information - location = result.get('location', {}) - location_type = location.get('type', 'unknown') + location = result.get("location", {}) + location_type = location.get("type", "unknown") source_uri = None # Map for location types and their URI fields location_mapping = { - 's3Location': {'field': 'uri', 'type': 'S3'}, - 'confluenceLocation': {'field': 'url', 'type': 'Confluence'}, - 'salesforceLocation': {'field': 'url', 'type': 'Salesforce'}, - 'sharePointLocation': {'field': 'url', 'type': 'SharePoint'}, - 'webLocation': {'field': 'url', 'type': 'Web'}, - 'customDocumentLocation': {'field': 'id', 'type': 'CustomDocument'}, - 'kendraDocumentLocation': {'field': 'uri', 'type': 'KendraDocument'}, - 'sqlLocation': {'field': 'query', 'type': 'SQL'} + "s3Location": {"field": "uri", "type": "S3"}, + "confluenceLocation": {"field": "url", "type": "Confluence"}, + "salesforceLocation": {"field": "url", "type": "Salesforce"}, + "sharePointLocation": {"field": "url", "type": "SharePoint"}, + "webLocation": {"field": "url", "type": "Web"}, + "customDocumentLocation": {"field": "id", "type": "CustomDocument"}, + "kendraDocumentLocation": {"field": "uri", "type": "KendraDocument"}, + "sqlLocation": {"field": "query", "type": "SQL"}, } # Extract the URI based on location type for loc_key, config in location_mapping.items(): if loc_key in location: - source_uri = location[loc_key].get(config['field']) - if not location_type or location_type == 'unknown': - location_type = config['type'] + source_uri = location[loc_key].get(config["field"]) + if not location_type or location_type == "unknown": + location_type = config["type"] break # Create result object result_object = { - 'content': content, - 'content_type': content_type, - 'source_type': location_type, - 'source_uri': source_uri + "content": content, + "content_type": content_type, + "source_type": location_type, + "source_uri": source_uri, } # Add optional fields if available - if 'score' in result: - result_object['score'] = result['score'] + if "score" in result: + result_object["score"] = result["score"] - if 'metadata' in result: - result_object['metadata'] = result['metadata'] + if "metadata" in result: + result_object["metadata"] = result["metadata"] # Handle byte content if present - if 'byteContent' in content_obj: - result_object['byte_content'] = content_obj['byteContent'] + if "byteContent" in content_obj: + result_object["byte_content"] = content_obj["byteContent"] # Handle row content if present - if 'row' in content_obj: - result_object['row_content'] = content_obj['row'] + if "row" in content_obj: + result_object["row_content"] = content_obj["row"] return result_object @@ -186,35 +199,35 @@ class BedrockKBRetrieverTool(BaseTool): try: # Initialize the Bedrock Agent Runtime client bedrock_agent_runtime = boto3.client( - 'bedrock-agent-runtime', - region_name=os.getenv('AWS_REGION', os.getenv('AWS_DEFAULT_REGION', 'us-east-1')), + "bedrock-agent-runtime", + region_name=os.getenv( + "AWS_REGION", os.getenv("AWS_DEFAULT_REGION", "us-east-1") + ), # AWS SDK will automatically use AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY from environment ) # Prepare the request parameters retrieve_params = { - 'knowledgeBaseId': self.knowledge_base_id, - 'retrievalQuery': { - 'text': query - } + "knowledgeBaseId": self.knowledge_base_id, + "retrievalQuery": {"text": query}, } # Add optional parameters if provided if self.retrieval_configuration: - retrieve_params['retrievalConfiguration'] = self.retrieval_configuration + retrieve_params["retrievalConfiguration"] = self.retrieval_configuration if self.guardrail_configuration: - retrieve_params['guardrailConfiguration'] = self.guardrail_configuration + retrieve_params["guardrailConfiguration"] = self.guardrail_configuration if self.next_token: - retrieve_params['nextToken'] = self.next_token + retrieve_params["nextToken"] = self.next_token # Make the retrieve API call response = bedrock_agent_runtime.retrieve(**retrieve_params) # Process the response results = [] - for result in response.get('retrievalResults', []): + for result in response.get("retrievalResults", []): processed_result = self._process_retrieval_result(result) results.append(processed_result) @@ -239,10 +252,10 @@ class BedrockKBRetrieverTool(BaseTool): error_message = str(e) # Try to extract error code if available - if hasattr(e, 'response') and 'Error' in e.response: - error_code = e.response['Error'].get('Code', 'Unknown') - error_message = e.response['Error'].get('Message', str(e)) + if hasattr(e, "response") and "Error" in e.response: + error_code = e.response["Error"].get("Code", "Unknown") + error_message = e.response["Error"].get("Message", str(e)) raise BedrockKnowledgeBaseError(f"Error ({error_code}): {error_message}") except Exception as e: - raise BedrockKnowledgeBaseError(f"Unexpected error: {str(e)}") \ No newline at end of file + raise BedrockKnowledgeBaseError(f"Unexpected error: {e!s}") diff --git a/packages/tools/src/crewai_tools/aws/s3/__init__.py b/packages/tools/src/crewai_tools/aws/s3/__init__.py index 4c858837c..4dfe44415 100644 --- a/packages/tools/src/crewai_tools/aws/s3/__init__.py +++ b/packages/tools/src/crewai_tools/aws/s3/__init__.py @@ -1,2 +1,2 @@ from .reader_tool import S3ReaderTool -from .writer_tool import S3WriterTool \ No newline at end of file +from .writer_tool import S3WriterTool diff --git a/packages/tools/src/crewai_tools/aws/s3/reader_tool.py b/packages/tools/src/crewai_tools/aws/s3/reader_tool.py index c3f1fa4eb..f5e047d6e 100644 --- a/packages/tools/src/crewai_tools/aws/s3/reader_tool.py +++ b/packages/tools/src/crewai_tools/aws/s3/reader_tool.py @@ -1,4 +1,3 @@ -from typing import Any, Type, List import os from crewai.tools import BaseTool @@ -8,14 +7,16 @@ from pydantic import BaseModel, Field class S3ReaderToolInput(BaseModel): """Input schema for S3ReaderTool.""" - file_path: str = Field(..., description="S3 file path (e.g., 's3://bucket-name/file-name')") + file_path: str = Field( + ..., description="S3 file path (e.g., 's3://bucket-name/file-name')" + ) class S3ReaderTool(BaseTool): name: str = "S3 Reader Tool" description: str = "Reads a file from Amazon S3 given an S3 file path" - args_schema: Type[BaseModel] = S3ReaderToolInput - package_dependencies: List[str] = ["boto3"] + args_schema: type[BaseModel] = S3ReaderToolInput + package_dependencies: list[str] = ["boto3"] def _run(self, file_path: str) -> str: try: @@ -28,19 +29,18 @@ class S3ReaderTool(BaseTool): bucket_name, object_key = self._parse_s3_path(file_path) s3 = boto3.client( - 's3', - region_name=os.getenv('CREW_AWS_REGION', 'us-east-1'), - aws_access_key_id=os.getenv('CREW_AWS_ACCESS_KEY_ID'), - aws_secret_access_key=os.getenv('CREW_AWS_SEC_ACCESS_KEY') + "s3", + region_name=os.getenv("CREW_AWS_REGION", "us-east-1"), + aws_access_key_id=os.getenv("CREW_AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("CREW_AWS_SEC_ACCESS_KEY"), ) # Read file content from S3 response = s3.get_object(Bucket=bucket_name, Key=object_key) - file_content = response['Body'].read().decode('utf-8') + return response["Body"].read().decode("utf-8") - return file_content except ClientError as e: - return f"Error reading file from S3: {str(e)}" + return f"Error reading file from S3: {e!s}" def _parse_s3_path(self, file_path: str) -> tuple: parts = file_path.replace("s3://", "").split("/", 1) diff --git a/packages/tools/src/crewai_tools/aws/s3/writer_tool.py b/packages/tools/src/crewai_tools/aws/s3/writer_tool.py index 2e1528d13..c2afb8287 100644 --- a/packages/tools/src/crewai_tools/aws/s3/writer_tool.py +++ b/packages/tools/src/crewai_tools/aws/s3/writer_tool.py @@ -1,20 +1,23 @@ -from typing import Type, List import os from crewai.tools import BaseTool from pydantic import BaseModel, Field + class S3WriterToolInput(BaseModel): """Input schema for S3WriterTool.""" - file_path: str = Field(..., description="S3 file path (e.g., 's3://bucket-name/file-name')") + + file_path: str = Field( + ..., description="S3 file path (e.g., 's3://bucket-name/file-name')" + ) content: str = Field(..., description="Content to write to the file") class S3WriterTool(BaseTool): name: str = "S3 Writer Tool" description: str = "Writes content to a file in Amazon S3 given an S3 file path" - args_schema: Type[BaseModel] = S3WriterToolInput - package_dependencies: List[str] = ["boto3"] + args_schema: type[BaseModel] = S3WriterToolInput + package_dependencies: list[str] = ["boto3"] def _run(self, file_path: str, content: str) -> str: try: @@ -27,16 +30,18 @@ class S3WriterTool(BaseTool): bucket_name, object_key = self._parse_s3_path(file_path) s3 = boto3.client( - 's3', - region_name=os.getenv('CREW_AWS_REGION', 'us-east-1'), - aws_access_key_id=os.getenv('CREW_AWS_ACCESS_KEY_ID'), - aws_secret_access_key=os.getenv('CREW_AWS_SEC_ACCESS_KEY') + "s3", + region_name=os.getenv("CREW_AWS_REGION", "us-east-1"), + aws_access_key_id=os.getenv("CREW_AWS_ACCESS_KEY_ID"), + aws_secret_access_key=os.getenv("CREW_AWS_SEC_ACCESS_KEY"), ) - s3.put_object(Bucket=bucket_name, Key=object_key, Body=content.encode('utf-8')) + s3.put_object( + Bucket=bucket_name, Key=object_key, Body=content.encode("utf-8") + ) return f"Successfully wrote content to {file_path}" except ClientError as e: - return f"Error writing file to S3: {str(e)}" + return f"Error writing file to S3: {e!s}" def _parse_s3_path(self, file_path: str) -> tuple: parts = file_path.replace("s3://", "").split("/", 1) diff --git a/packages/tools/src/crewai_tools/printer.py b/packages/tools/src/crewai_tools/printer.py index c67005ddd..70ee707d8 100644 --- a/packages/tools/src/crewai_tools/printer.py +++ b/packages/tools/src/crewai_tools/printer.py @@ -1,13 +1,11 @@ """Utility for colored console output.""" -from typing import Optional - class Printer: """Handles colored console output formatting.""" @staticmethod - def print(content: str, color: Optional[str] = None) -> None: + def print(content: str, color: str | None = None) -> None: """Prints content with optional color formatting. Args: @@ -29,7 +27,7 @@ class Printer: Args: content: The string to be printed in bold purple. """ - print("\033[1m\033[95m {}\033[00m".format(content)) + print(f"\033[1m\033[95m {content}\033[00m") @staticmethod def _print_bold_green(content: str) -> None: @@ -38,7 +36,7 @@ class Printer: Args: content: The string to be printed in bold green. """ - print("\033[1m\033[92m {}\033[00m".format(content)) + print(f"\033[1m\033[92m {content}\033[00m") @staticmethod def _print_purple(content: str) -> None: @@ -47,7 +45,7 @@ class Printer: Args: content: The string to be printed in purple. """ - print("\033[95m {}\033[00m".format(content)) + print(f"\033[95m {content}\033[00m") @staticmethod def _print_red(content: str) -> None: @@ -56,7 +54,7 @@ class Printer: Args: content: The string to be printed in red. """ - print("\033[91m {}\033[00m".format(content)) + print(f"\033[91m {content}\033[00m") @staticmethod def _print_bold_blue(content: str) -> None: @@ -65,7 +63,7 @@ class Printer: Args: content: The string to be printed in bold blue. """ - print("\033[1m\033[94m {}\033[00m".format(content)) + print(f"\033[1m\033[94m {content}\033[00m") @staticmethod def _print_yellow(content: str) -> None: @@ -74,7 +72,7 @@ class Printer: Args: content: The string to be printed in yellow. """ - print("\033[93m {}\033[00m".format(content)) + print(f"\033[93m {content}\033[00m") @staticmethod def _print_bold_yellow(content: str) -> None: @@ -83,7 +81,7 @@ class Printer: Args: content: The string to be printed in bold yellow. """ - print("\033[1m\033[93m {}\033[00m".format(content)) + print(f"\033[1m\033[93m {content}\033[00m") @staticmethod def _print_cyan(content: str) -> None: @@ -92,7 +90,7 @@ class Printer: Args: content: The string to be printed in cyan. """ - print("\033[96m {}\033[00m".format(content)) + print(f"\033[96m {content}\033[00m") @staticmethod def _print_bold_cyan(content: str) -> None: @@ -101,7 +99,7 @@ class Printer: Args: content: The string to be printed in bold cyan. """ - print("\033[1m\033[96m {}\033[00m".format(content)) + print(f"\033[1m\033[96m {content}\033[00m") @staticmethod def _print_magenta(content: str) -> None: @@ -110,7 +108,7 @@ class Printer: Args: content: The string to be printed in magenta. """ - print("\033[35m {}\033[00m".format(content)) + print(f"\033[35m {content}\033[00m") @staticmethod def _print_bold_magenta(content: str) -> None: @@ -119,7 +117,7 @@ class Printer: Args: content: The string to be printed in bold magenta. """ - print("\033[1m\033[35m {}\033[00m".format(content)) + print(f"\033[1m\033[35m {content}\033[00m") @staticmethod def _print_green(content: str) -> None: @@ -128,4 +126,4 @@ class Printer: Args: content: The string to be printed in green. """ - print("\033[32m {}\033[00m".format(content)) + print(f"\033[32m {content}\033[00m") diff --git a/packages/tools/src/crewai_tools/rag/__init__.py b/packages/tools/src/crewai_tools/rag/__init__.py index 8d08b2907..0ff0fe125 100644 --- a/packages/tools/src/crewai_tools/rag/__init__.py +++ b/packages/tools/src/crewai_tools/rag/__init__.py @@ -3,6 +3,6 @@ from crewai_tools.rag.data_types import DataType __all__ = [ "RAG", - "EmbeddingService", "DataType", + "EmbeddingService", ] diff --git a/packages/tools/src/crewai_tools/rag/base_loader.py b/packages/tools/src/crewai_tools/rag/base_loader.py index e38d6f8c1..4aec0e53d 100644 --- a/packages/tools/src/crewai_tools/rag/base_loader.py +++ b/packages/tools/src/crewai_tools/rag/base_loader.py @@ -1,5 +1,6 @@ from abc import ABC, abstractmethod -from typing import Any, Dict, Optional +from typing import Any + from pydantic import BaseModel, Field from crewai_tools.rag.misc import compute_sha256 @@ -9,19 +10,22 @@ from crewai_tools.rag.source_content import SourceContent class LoaderResult(BaseModel): content: str = Field(description="The text content of the source") source: str = Field(description="The source of the content", default="unknown") - metadata: Dict[str, Any] = Field(description="The metadata of the source", default_factory=dict) + metadata: dict[str, Any] = Field( + description="The metadata of the source", default_factory=dict + ) doc_id: str = Field(description="The id of the document") class BaseLoader(ABC): - def __init__(self, config: Optional[Dict[str, Any]] = None): + def __init__(self, config: dict[str, Any] | None = None): self.config = config or {} @abstractmethod - def load(self, content: SourceContent, **kwargs) -> LoaderResult: - ... + def load(self, content: SourceContent, **kwargs) -> LoaderResult: ... - def generate_doc_id(self, source_ref: str | None = None, content: str | None = None) -> str: + def generate_doc_id( + self, source_ref: str | None = None, content: str | None = None + ) -> str: """ Generate a unique document id based on the source reference and content. If the source reference is not provided, the content is used as the source reference. diff --git a/packages/tools/src/crewai_tools/rag/chunkers/__init__.py b/packages/tools/src/crewai_tools/rag/chunkers/__init__.py index f48483391..8a08052fa 100644 --- a/packages/tools/src/crewai_tools/rag/chunkers/__init__.py +++ b/packages/tools/src/crewai_tools/rag/chunkers/__init__.py @@ -1,15 +1,19 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker from crewai_tools.rag.chunkers.default_chunker import DefaultChunker -from crewai_tools.rag.chunkers.text_chunker import TextChunker, DocxChunker, MdxChunker -from crewai_tools.rag.chunkers.structured_chunker import CsvChunker, JsonChunker, XmlChunker +from crewai_tools.rag.chunkers.structured_chunker import ( + CsvChunker, + JsonChunker, + XmlChunker, +) +from crewai_tools.rag.chunkers.text_chunker import DocxChunker, MdxChunker, TextChunker __all__ = [ "BaseChunker", - "DefaultChunker", - "TextChunker", - "DocxChunker", - "MdxChunker", "CsvChunker", + "DefaultChunker", + "DocxChunker", "JsonChunker", + "MdxChunker", + "TextChunker", "XmlChunker", ] diff --git a/packages/tools/src/crewai_tools/rag/chunkers/base_chunker.py b/packages/tools/src/crewai_tools/rag/chunkers/base_chunker.py index deafbfc7a..592807a57 100644 --- a/packages/tools/src/crewai_tools/rag/chunkers/base_chunker.py +++ b/packages/tools/src/crewai_tools/rag/chunkers/base_chunker.py @@ -1,6 +1,6 @@ -from typing import List, Optional import re + class RecursiveCharacterTextSplitter: """ A text splitter that recursively splits text based on a hierarchy of separators. @@ -10,7 +10,7 @@ class RecursiveCharacterTextSplitter: self, chunk_size: int = 4000, chunk_overlap: int = 200, - separators: Optional[List[str]] = None, + separators: list[str] | None = None, keep_separator: bool = True, ): """ @@ -23,7 +23,9 @@ class RecursiveCharacterTextSplitter: keep_separator: Whether to keep the separator in the split text """ if chunk_overlap >= chunk_size: - raise ValueError(f"Chunk overlap ({chunk_overlap}) cannot be >= chunk size ({chunk_size})") + raise ValueError( + f"Chunk overlap ({chunk_overlap}) cannot be >= chunk size ({chunk_size})" + ) self._chunk_size = chunk_size self._chunk_overlap = chunk_overlap @@ -36,10 +38,10 @@ class RecursiveCharacterTextSplitter: "", ] - def split_text(self, text: str) -> List[str]: + def split_text(self, text: str) -> list[str]: return self._split_text(text, self._separators) - def _split_text(self, text: str, separators: List[str]) -> List[str]: + def _split_text(self, text: str, separators: list[str]) -> list[str]: separator = separators[-1] new_separators = [] @@ -49,7 +51,7 @@ class RecursiveCharacterTextSplitter: break if re.search(re.escape(sep), text): separator = sep - new_separators = separators[i + 1:] + new_separators = separators[i + 1 :] break splits = self._split_text_with_separator(text, separator) @@ -68,7 +70,7 @@ class RecursiveCharacterTextSplitter: return self._merge_splits(good_splits, separator) - def _split_text_with_separator(self, text: str, separator: str) -> List[str]: + def _split_text_with_separator(self, text: str, separator: str) -> list[str]: if separator == "": return list(text) @@ -90,16 +92,15 @@ class RecursiveCharacterTextSplitter: splits[-1] += separator return [s for s in splits if s] - else: - return text.split(separator) + return text.split(separator) - def _split_by_characters(self, text: str) -> List[str]: + def _split_by_characters(self, text: str) -> list[str]: chunks = [] for i in range(0, len(text), self._chunk_size): - chunks.append(text[i:i + self._chunk_size]) + chunks.append(text[i : i + self._chunk_size]) return chunks - def _merge_splits(self, splits: List[str], separator: str) -> List[str]: + def _merge_splits(self, splits: list[str], separator: str) -> list[str]: """Merge splits into chunks with proper overlap.""" docs = [] current_doc = [] @@ -112,7 +113,10 @@ class RecursiveCharacterTextSplitter: if separator == "": doc = "".join(current_doc) else: - doc = separator.join(current_doc) + if self._keep_separator and separator == " ": + doc = "".join(current_doc) + else: + doc = separator.join(current_doc) if doc: docs.append(doc) @@ -133,15 +137,25 @@ class RecursiveCharacterTextSplitter: if separator == "": doc = "".join(current_doc) else: - doc = separator.join(current_doc) + if self._keep_separator and separator == " ": + doc = "".join(current_doc) + else: + doc = separator.join(current_doc) if doc: docs.append(doc) return docs + class BaseChunker: - def __init__(self, chunk_size: int = 1000, chunk_overlap: int = 200, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 1000, + chunk_overlap: int = 200, + separators: list[str] | None = None, + keep_separator: bool = True, + ): """ Initialize the Chunker @@ -159,8 +173,7 @@ class BaseChunker: keep_separator=keep_separator, ) - - def chunk(self, text: str) -> List[str]: + def chunk(self, text: str) -> list[str]: if not text or not text.strip(): return [] diff --git a/packages/tools/src/crewai_tools/rag/chunkers/default_chunker.py b/packages/tools/src/crewai_tools/rag/chunkers/default_chunker.py index 0d0ec6935..7073161b2 100644 --- a/packages/tools/src/crewai_tools/rag/chunkers/default_chunker.py +++ b/packages/tools/src/crewai_tools/rag/chunkers/default_chunker.py @@ -1,6 +1,12 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker -from typing import List, Optional + class DefaultChunker(BaseChunker): - def __init__(self, chunk_size: int = 2000, chunk_overlap: int = 20, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 2000, + chunk_overlap: int = 20, + separators: list[str] | None = None, + keep_separator: bool = True, + ): super().__init__(chunk_size, chunk_overlap, separators, keep_separator) diff --git a/packages/tools/src/crewai_tools/rag/chunkers/structured_chunker.py b/packages/tools/src/crewai_tools/rag/chunkers/structured_chunker.py index 483f92588..4fb4a36df 100644 --- a/packages/tools/src/crewai_tools/rag/chunkers/structured_chunker.py +++ b/packages/tools/src/crewai_tools/rag/chunkers/structured_chunker.py @@ -1,49 +1,66 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker -from typing import List, Optional class CsvChunker(BaseChunker): - def __init__(self, chunk_size: int = 1200, chunk_overlap: int = 100, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 1200, + chunk_overlap: int = 100, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ - "\nRow ", # Row boundaries (from CSVLoader format) - "\n", # Line breaks - " | ", # Column separators - ", ", # Comma separators - " ", # Word breaks - "", # Character level + "\nRow ", # Row boundaries (from CSVLoader format) + "\n", # Line breaks + " | ", # Column separators + ", ", # Comma separators + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) class JsonChunker(BaseChunker): - def __init__(self, chunk_size: int = 2000, chunk_overlap: int = 200, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 2000, + chunk_overlap: int = 200, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ - "\n\n", # Object/array boundaries - "\n", # Line breaks - "},", # Object endings - "],", # Array endings - ", ", # Property separators - ": ", # Key-value separators - " ", # Word breaks - "", # Character level + "\n\n", # Object/array boundaries + "\n", # Line breaks + "},", # Object endings + "],", # Array endings + ", ", # Property separators + ": ", # Key-value separators + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) class XmlChunker(BaseChunker): - def __init__(self, chunk_size: int = 2500, chunk_overlap: int = 250, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 2500, + chunk_overlap: int = 250, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ - "\n\n", # Element boundaries - "\n", # Line breaks - ">", # Tag endings - ". ", # Sentence endings (for text content) - "! ", # Exclamation endings - "? ", # Question endings - ", ", # Comma separators - " ", # Word breaks - "", # Character level + "\n\n", # Element boundaries + "\n", # Line breaks + ">", # Tag endings + ". ", # Sentence endings (for text content) + "! ", # Exclamation endings + "? ", # Question endings + ", ", # Comma separators + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) diff --git a/packages/tools/src/crewai_tools/rag/chunkers/text_chunker.py b/packages/tools/src/crewai_tools/rag/chunkers/text_chunker.py index 2e76df8ab..7b9aae5b0 100644 --- a/packages/tools/src/crewai_tools/rag/chunkers/text_chunker.py +++ b/packages/tools/src/crewai_tools/rag/chunkers/text_chunker.py @@ -1,59 +1,76 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker -from typing import List, Optional class TextChunker(BaseChunker): - def __init__(self, chunk_size: int = 1500, chunk_overlap: int = 150, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 1500, + chunk_overlap: int = 150, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ "\n\n\n", # Multiple line breaks (sections) - "\n\n", # Paragraph breaks - "\n", # Line breaks - ". ", # Sentence endings - "! ", # Exclamation endings - "? ", # Question endings - "; ", # Semicolon breaks - ", ", # Comma breaks - " ", # Word breaks - "", # Character level + "\n\n", # Paragraph breaks + "\n", # Line breaks + ". ", # Sentence endings + "! ", # Exclamation endings + "? ", # Question endings + "; ", # Semicolon breaks + ", ", # Comma breaks + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) class DocxChunker(BaseChunker): - def __init__(self, chunk_size: int = 2500, chunk_overlap: int = 250, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 2500, + chunk_overlap: int = 250, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ "\n\n\n", # Multiple line breaks (major sections) - "\n\n", # Paragraph breaks - "\n", # Line breaks - ". ", # Sentence endings - "! ", # Exclamation endings - "? ", # Question endings - "; ", # Semicolon breaks - ", ", # Comma breaks - " ", # Word breaks - "", # Character level + "\n\n", # Paragraph breaks + "\n", # Line breaks + ". ", # Sentence endings + "! ", # Exclamation endings + "? ", # Question endings + "; ", # Semicolon breaks + ", ", # Comma breaks + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) class MdxChunker(BaseChunker): - def __init__(self, chunk_size: int = 3000, chunk_overlap: int = 300, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 3000, + chunk_overlap: int = 300, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ - "\n## ", # H2 headers (major sections) + "\n## ", # H2 headers (major sections) "\n### ", # H3 headers (subsections) - "\n#### ", # H4 headers (sub-subsections) - "\n\n", # Paragraph breaks - "\n```", # Code block boundaries - "\n", # Line breaks - ". ", # Sentence endings - "! ", # Exclamation endings - "? ", # Question endings - "; ", # Semicolon breaks - ", ", # Comma breaks - " ", # Word breaks - "", # Character level + "\n#### ", # H4 headers (sub-subsections) + "\n\n", # Paragraph breaks + "\n```", # Code block boundaries + "\n", # Line breaks + ". ", # Sentence endings + "! ", # Exclamation endings + "? ", # Question endings + "; ", # Semicolon breaks + ", ", # Comma breaks + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) diff --git a/packages/tools/src/crewai_tools/rag/chunkers/web_chunker.py b/packages/tools/src/crewai_tools/rag/chunkers/web_chunker.py index 2712a6c69..cc1a514d3 100644 --- a/packages/tools/src/crewai_tools/rag/chunkers/web_chunker.py +++ b/packages/tools/src/crewai_tools/rag/chunkers/web_chunker.py @@ -1,20 +1,25 @@ from crewai_tools.rag.chunkers.base_chunker import BaseChunker -from typing import List, Optional class WebsiteChunker(BaseChunker): - def __init__(self, chunk_size: int = 2500, chunk_overlap: int = 250, separators: Optional[List[str]] = None, keep_separator: bool = True): + def __init__( + self, + chunk_size: int = 2500, + chunk_overlap: int = 250, + separators: list[str] | None = None, + keep_separator: bool = True, + ): if separators is None: separators = [ "\n\n\n", # Major section breaks - "\n\n", # Paragraph breaks - "\n", # Line breaks - ". ", # Sentence endings - "! ", # Exclamation endings - "? ", # Question endings - "; ", # Semicolon breaks - ", ", # Comma breaks - " ", # Word breaks - "", # Character level + "\n\n", # Paragraph breaks + "\n", # Line breaks + ". ", # Sentence endings + "! ", # Exclamation endings + "? ", # Question endings + "; ", # Semicolon breaks + ", ", # Comma breaks + " ", # Word breaks + "", # Character level ] super().__init__(chunk_size, chunk_overlap, separators, keep_separator) diff --git a/packages/tools/src/crewai_tools/rag/core.py b/packages/tools/src/crewai_tools/rag/core.py index 0aa4b666c..b7834ccc5 100644 --- a/packages/tools/src/crewai_tools/rag/core.py +++ b/packages/tools/src/crewai_tools/rag/core.py @@ -1,18 +1,18 @@ import logging from pathlib import Path -from typing import Any, Dict, List, Optional, Union +from typing import Any from uuid import uuid4 import chromadb import litellm from pydantic import BaseModel, Field, PrivateAttr -from crewai_tools.tools.rag.rag_tool import Adapter -from crewai_tools.rag.data_types import DataType from crewai_tools.rag.base_loader import BaseLoader from crewai_tools.rag.chunkers.base_chunker import BaseChunker -from crewai_tools.rag.source_content import SourceContent +from crewai_tools.rag.data_types import DataType from crewai_tools.rag.misc import compute_sha256 +from crewai_tools.rag.source_content import SourceContent +from crewai_tools.tools.rag.rag_tool import Adapter logger = logging.getLogger(__name__) @@ -22,29 +22,21 @@ class EmbeddingService: self.model = model self.kwargs = kwargs - def embed_text(self, text: str) -> List[float]: + def embed_text(self, text: str) -> list[float]: try: - response = litellm.embedding( - model=self.model, - input=[text], - **self.kwargs - ) - return response.data[0]['embedding'] + response = litellm.embedding(model=self.model, input=[text], **self.kwargs) + return response.data[0]["embedding"] except Exception as e: logger.error(f"Error generating embedding: {e}") raise - def embed_batch(self, texts: List[str]) -> List[List[float]]: + def embed_batch(self, texts: list[str]) -> list[list[float]]: if not texts: return [] try: - response = litellm.embedding( - model=self.model, - input=texts, - **self.kwargs - ) - return [data['embedding'] for data in response.data] + response = litellm.embedding(model=self.model, input=texts, **self.kwargs) + return [data["embedding"] for data in response.data] except Exception as e: logger.error(f"Error generating batch embeddings: {e}") raise @@ -53,18 +45,18 @@ class EmbeddingService: class Document(BaseModel): id: str = Field(default_factory=lambda: str(uuid4())) content: str - metadata: Dict[str, Any] = Field(default_factory=dict) + metadata: dict[str, Any] = Field(default_factory=dict) data_type: DataType = DataType.TEXT - source: Optional[str] = None + source: str | None = None class RAG(Adapter): collection_name: str = "crewai_knowledge_base" - persist_directory: Optional[str] = None + persist_directory: str | None = None embedding_model: str = "text-embedding-3-large" summarize: bool = False top_k: int = 5 - embedding_config: Dict[str, Any] = Field(default_factory=dict) + embedding_config: dict[str, Any] = Field(default_factory=dict) _client: Any = PrivateAttr() _collection: Any = PrivateAttr() @@ -79,10 +71,15 @@ class RAG(Adapter): self._collection = self._client.get_or_create_collection( name=self.collection_name, - metadata={"hnsw:space": "cosine", "description": "CrewAI Knowledge Base"} + metadata={ + "hnsw:space": "cosine", + "description": "CrewAI Knowledge Base", + }, ) - self._embedding_service = EmbeddingService(model=self.embedding_model, **self.embedding_config) + self._embedding_service = EmbeddingService( + model=self.embedding_model, **self.embedding_config + ) except Exception as e: logger.error(f"Failed to initialize ChromaDB: {e}") raise @@ -92,11 +89,11 @@ class RAG(Adapter): def add( self, content: str | Path, - data_type: Optional[Union[str, DataType]] = None, - metadata: Optional[Dict[str, Any]] = None, - loader: Optional[BaseLoader] = None, - chunker: Optional[BaseChunker] = None, - **kwargs: Any + data_type: str | DataType | None = None, + metadata: dict[str, Any] | None = None, + loader: BaseLoader | None = None, + chunker: BaseChunker | None = None, + **kwargs: Any, ) -> None: source_content = SourceContent(content) @@ -111,11 +108,19 @@ class RAG(Adapter): loader_result = loader.load(source_content) doc_id = loader_result.doc_id - existing_doc = self._collection.get(where={"source": source_content.source_ref}, limit=1) - existing_doc_id = existing_doc and existing_doc['metadatas'][0]['doc_id'] if existing_doc['metadatas'] else None + existing_doc = self._collection.get( + where={"source": source_content.source_ref}, limit=1 + ) + existing_doc_id = ( + existing_doc and existing_doc["metadatas"][0]["doc_id"] + if existing_doc["metadatas"] + else None + ) if existing_doc_id == doc_id: - logger.warning(f"Document with source {loader_result.source} already exists") + logger.warning( + f"Document with source {loader_result.source} already exists" + ) return # Document with same source ref does exists but the content has changed, deleting the oldest reference @@ -128,14 +133,16 @@ class RAG(Adapter): chunks = chunker.chunk(loader_result.content) for i, chunk in enumerate(chunks): doc_metadata = (metadata or {}).copy() - doc_metadata['chunk_index'] = i - documents.append(Document( - id=compute_sha256(chunk), - content=chunk, - metadata=doc_metadata, - data_type=data_type, - source=loader_result.source - )) + doc_metadata["chunk_index"] = i + documents.append( + Document( + id=compute_sha256(chunk), + content=chunk, + metadata=doc_metadata, + data_type=data_type, + source=loader_result.source, + ) + ) if not documents: logger.warning("No documents to add") @@ -153,11 +160,13 @@ class RAG(Adapter): for doc in documents: doc_metadata = doc.metadata.copy() - doc_metadata.update({ - "data_type": doc.data_type.value, - "source": doc.source, - "doc_id": doc_id - }) + doc_metadata.update( + { + "data_type": doc.data_type.value, + "source": doc.source, + "doc_id": doc_id, + } + ) metadatas.append(doc_metadata) try: @@ -171,7 +180,7 @@ class RAG(Adapter): except Exception as e: logger.error(f"Failed to add documents to ChromaDB: {e}") - def query(self, question: str, where: Optional[Dict[str, Any]] = None) -> str: + def query(self, question: str, where: dict[str, Any] | None = None) -> str: try: question_embedding = self._embedding_service.embed_text(question) @@ -179,10 +188,14 @@ class RAG(Adapter): query_embeddings=[question_embedding], n_results=self.top_k, where=where, - include=["documents", "metadatas", "distances"] + include=["documents", "metadatas", "distances"], ) - if not results or not results.get("documents") or not results["documents"][0]: + if ( + not results + or not results.get("documents") + or not results["documents"][0] + ): return "No relevant content found." documents = results["documents"][0] @@ -195,8 +208,12 @@ class RAG(Adapter): metadata = metadatas[i] if i < len(metadatas) else {} distance = distances[i] if i < len(distances) else 1.0 source = metadata.get("source", "unknown") if metadata else "unknown" - score = 1 - distance if distance is not None else 0 # Convert distance to similarity - formatted_results.append(f"[Source: {source}, Relevance: {score:.3f}]\n{doc}") + score = ( + 1 - distance if distance is not None else 0 + ) # Convert distance to similarity + formatted_results.append( + f"[Source: {source}, Relevance: {score:.3f}]\n{doc}" + ) return "\n\n".join(formatted_results) except Exception as e: @@ -210,23 +227,25 @@ class RAG(Adapter): except Exception as e: logger.error(f"Failed to delete collection: {e}") - def get_collection_info(self) -> Dict[str, Any]: + def get_collection_info(self) -> dict[str, Any]: try: count = self._collection.count() return { "name": self.collection_name, "count": count, - "embedding_model": self.embedding_model + "embedding_model": self.embedding_model, } except Exception as e: logger.error(f"Failed to get collection info: {e}") return {"error": str(e)} - def _get_data_type(self, content: SourceContent, data_type: str | DataType | None = None) -> DataType: + def _get_data_type( + self, content: SourceContent, data_type: str | DataType | None = None + ) -> DataType: try: if isinstance(data_type, str): return DataType(data_type) - except Exception as e: + except Exception: pass return content.data_type diff --git a/packages/tools/src/crewai_tools/rag/data_types.py b/packages/tools/src/crewai_tools/rag/data_types.py index d2d265cce..49c0e5d52 100644 --- a/packages/tools/src/crewai_tools/rag/data_types.py +++ b/packages/tools/src/crewai_tools/rag/data_types.py @@ -1,9 +1,11 @@ +import os from enum import Enum from pathlib import Path from urllib.parse import urlparse -import os -from crewai_tools.rag.chunkers.base_chunker import BaseChunker + from crewai_tools.rag.base_loader import BaseLoader +from crewai_tools.rag.chunkers.base_chunker import BaseChunker + class DataType(str, Enum): PDF_FILE = "pdf_file" @@ -25,29 +27,38 @@ class DataType(str, Enum): # Web types WEBSITE = "website" DOCS_SITE = "docs_site" + YOUTUBE_VIDEO = "youtube_video" + YOUTUBE_CHANNEL = "youtube_channel" # Raw types TEXT = "text" - def get_chunker(self) -> BaseChunker: from importlib import import_module chunkers = { + DataType.PDF_FILE: ("text_chunker", "TextChunker"), DataType.TEXT_FILE: ("text_chunker", "TextChunker"), DataType.TEXT: ("text_chunker", "TextChunker"), DataType.DOCX: ("text_chunker", "DocxChunker"), DataType.MDX: ("text_chunker", "MdxChunker"), - # Structured formats DataType.CSV: ("structured_chunker", "CsvChunker"), DataType.JSON: ("structured_chunker", "JsonChunker"), DataType.XML: ("structured_chunker", "XmlChunker"), - DataType.WEBSITE: ("web_chunker", "WebsiteChunker"), + DataType.DIRECTORY: ("text_chunker", "TextChunker"), + DataType.YOUTUBE_VIDEO: ("text_chunker", "TextChunker"), + DataType.YOUTUBE_CHANNEL: ("text_chunker", "TextChunker"), + DataType.GITHUB: ("text_chunker", "TextChunker"), + DataType.DOCS_SITE: ("text_chunker", "TextChunker"), + DataType.MYSQL: ("text_chunker", "TextChunker"), + DataType.POSTGRES: ("text_chunker", "TextChunker"), } - module_name, class_name = chunkers.get(self, ("default_chunker", "DefaultChunker")) + if self not in chunkers: + raise ValueError(f"No chunker defined for {self}") + module_name, class_name = chunkers[self] module_path = f"crewai_tools.rag.chunkers.{module_name}" try: @@ -60,6 +71,7 @@ class DataType(str, Enum): from importlib import import_module loaders = { + DataType.PDF_FILE: ("pdf_loader", "PDFLoader"), DataType.TEXT_FILE: ("text_loader", "TextFileLoader"), DataType.TEXT: ("text_loader", "TextLoader"), DataType.XML: ("xml_loader", "XMLLoader"), @@ -69,9 +81,20 @@ class DataType(str, Enum): DataType.DOCX: ("docx_loader", "DOCXLoader"), DataType.CSV: ("csv_loader", "CSVLoader"), DataType.DIRECTORY: ("directory_loader", "DirectoryLoader"), + DataType.YOUTUBE_VIDEO: ("youtube_video_loader", "YoutubeVideoLoader"), + DataType.YOUTUBE_CHANNEL: ( + "youtube_channel_loader", + "YoutubeChannelLoader", + ), + DataType.GITHUB: ("github_loader", "GithubLoader"), + DataType.DOCS_SITE: ("docs_site_loader", "DocsSiteLoader"), + DataType.MYSQL: ("mysql_loader", "MySQLLoader"), + DataType.POSTGRES: ("postgres_loader", "PostgresLoader"), } - module_name, class_name = loaders.get(self, ("text_loader", "TextLoader")) + if self not in loaders: + raise ValueError(f"No loader defined for {self}") + module_name, class_name = loaders[self] module_path = f"crewai_tools.rag.loaders.{module_name}" try: module = import_module(module_path) @@ -79,6 +102,7 @@ class DataType(str, Enum): except Exception as e: raise ValueError(f"Error loading loader for {self}: {e}") + class DataTypes: @staticmethod def from_content(content: str | Path | None = None) -> DataType: diff --git a/packages/tools/src/crewai_tools/rag/loaders/__init__.py b/packages/tools/src/crewai_tools/rag/loaders/__init__.py index 503651468..ec19cc26f 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/__init__.py +++ b/packages/tools/src/crewai_tools/rag/loaders/__init__.py @@ -1,20 +1,26 @@ -from crewai_tools.rag.loaders.text_loader import TextFileLoader, TextLoader -from crewai_tools.rag.loaders.xml_loader import XMLLoader -from crewai_tools.rag.loaders.webpage_loader import WebPageLoader -from crewai_tools.rag.loaders.mdx_loader import MDXLoader -from crewai_tools.rag.loaders.json_loader import JSONLoader -from crewai_tools.rag.loaders.docx_loader import DOCXLoader from crewai_tools.rag.loaders.csv_loader import CSVLoader from crewai_tools.rag.loaders.directory_loader import DirectoryLoader +from crewai_tools.rag.loaders.docx_loader import DOCXLoader +from crewai_tools.rag.loaders.json_loader import JSONLoader +from crewai_tools.rag.loaders.mdx_loader import MDXLoader +from crewai_tools.rag.loaders.pdf_loader import PDFLoader +from crewai_tools.rag.loaders.text_loader import TextFileLoader, TextLoader +from crewai_tools.rag.loaders.webpage_loader import WebPageLoader +from crewai_tools.rag.loaders.xml_loader import XMLLoader +from crewai_tools.rag.loaders.youtube_channel_loader import YoutubeChannelLoader +from crewai_tools.rag.loaders.youtube_video_loader import YoutubeVideoLoader __all__ = [ + "CSVLoader", + "DOCXLoader", + "DirectoryLoader", + "JSONLoader", + "MDXLoader", + "PDFLoader", "TextFileLoader", "TextLoader", - "XMLLoader", "WebPageLoader", - "MDXLoader", - "JSONLoader", - "DOCXLoader", - "CSVLoader", - "DirectoryLoader", + "XMLLoader", + "YoutubeChannelLoader", + "YoutubeVideoLoader", ] diff --git a/packages/tools/src/crewai_tools/rag/loaders/csv_loader.py b/packages/tools/src/crewai_tools/rag/loaders/csv_loader.py index e389123a7..fd74cbf19 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/csv_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/csv_loader.py @@ -17,21 +17,23 @@ class CSVLoader(BaseLoader): return self._parse_csv(content_str, source_ref) - def _load_from_url(self, url: str, kwargs: dict) -> str: import requests - headers = kwargs.get("headers", { - "Accept": "text/csv, application/csv, text/plain", - "User-Agent": "Mozilla/5.0 (compatible; crewai-tools CSVLoader)" - }) + headers = kwargs.get( + "headers", + { + "Accept": "text/csv, application/csv, text/plain", + "User-Agent": "Mozilla/5.0 (compatible; crewai-tools CSVLoader)", + }, + ) try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.text except Exception as e: - raise ValueError(f"Error fetching CSV from URL {url}: {str(e)}") + raise ValueError(f"Error fetching CSV from URL {url}: {e!s}") def _load_from_file(self, path: str) -> str: with open(path, "r", encoding="utf-8") as file: @@ -57,7 +59,7 @@ class CSVLoader(BaseLoader): metadata = { "format": "csv", "columns": headers, - "rows": len(text_parts) - 2 if headers else 0 + "rows": len(text_parts) - 2 if headers else 0, } except Exception as e: @@ -68,5 +70,5 @@ class CSVLoader(BaseLoader): content=text, source=source_ref, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=source_ref, content=text) + doc_id=self.generate_doc_id(source_ref=source_ref, content=text), ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/directory_loader.py b/packages/tools/src/crewai_tools/rag/loaders/directory_loader.py index 7bc5f298b..776fa3297 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/directory_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/directory_loader.py @@ -1,6 +1,5 @@ import os from pathlib import Path -from typing import List from crewai_tools.rag.base_loader import BaseLoader, LoaderResult from crewai_tools.rag.source_content import SourceContent @@ -22,7 +21,9 @@ class DirectoryLoader(BaseLoader): source_ref = source_content.source_ref if source_content.is_url(): - raise ValueError("URL directory loading is not supported. Please provide a local directory path.") + raise ValueError( + "URL directory loading is not supported. Please provide a local directory path." + ) if not os.path.exists(source_ref): raise FileNotFoundError(f"Directory does not exist: {source_ref}") @@ -38,7 +39,9 @@ class DirectoryLoader(BaseLoader): exclude_extensions = kwargs.get("exclude_extensions", None) max_files = kwargs.get("max_files", None) - files = self._find_files(dir_path, recursive, include_extensions, exclude_extensions) + files = self._find_files( + dir_path, recursive, include_extensions, exclude_extensions + ) if max_files and len(files) > max_files: files = files[:max_files] @@ -52,13 +55,15 @@ class DirectoryLoader(BaseLoader): result = self._process_single_file(file_path) if result: all_contents.append(f"=== File: {file_path} ===\n{result.content}") - processed_files.append({ - "path": file_path, - "metadata": result.metadata, - "source": result.source - }) + processed_files.append( + { + "path": file_path, + "metadata": result.metadata, + "source": result.source, + } + ) except Exception as e: - error_msg = f"Error processing {file_path}: {str(e)}" + error_msg = f"Error processing {file_path}: {e!s}" errors.append(error_msg) all_contents.append(f"=== File: {file_path} (ERROR) ===\n{error_msg}") @@ -71,23 +76,29 @@ class DirectoryLoader(BaseLoader): "processed_files": len(processed_files), "errors": len(errors), "file_details": processed_files, - "error_details": errors + "error_details": errors, } return LoaderResult( content=combined_content, source=dir_path, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=dir_path, content=combined_content) + doc_id=self.generate_doc_id(source_ref=dir_path, content=combined_content), ) - def _find_files(self, dir_path: str, recursive: bool, include_ext: List[str] | None = None, exclude_ext: List[str] | None = None) -> List[str]: + def _find_files( + self, + dir_path: str, + recursive: bool, + include_ext: list[str] | None = None, + exclude_ext: list[str] | None = None, + ) -> list[str]: """Find all files in directory matching criteria.""" files = [] if recursive: for root, dirs, filenames in os.walk(dir_path): - dirs[:] = [d for d in dirs if not d.startswith('.')] + dirs[:] = [d for d in dirs if not d.startswith(".")] for filename in filenames: if self._should_include_file(filename, include_ext, exclude_ext): @@ -96,26 +107,37 @@ class DirectoryLoader(BaseLoader): try: for item in os.listdir(dir_path): item_path = os.path.join(dir_path, item) - if os.path.isfile(item_path) and self._should_include_file(item, include_ext, exclude_ext): + if os.path.isfile(item_path) and self._should_include_file( + item, include_ext, exclude_ext + ): files.append(item_path) except PermissionError: pass return sorted(files) - def _should_include_file(self, filename: str, include_ext: List[str] = None, exclude_ext: List[str] = None) -> bool: + def _should_include_file( + self, + filename: str, + include_ext: list[str] | None = None, + exclude_ext: list[str] | None = None, + ) -> bool: """Determine if a file should be included based on criteria.""" - if filename.startswith('.'): + if filename.startswith("."): return False _, ext = os.path.splitext(filename.lower()) if include_ext: - if ext not in [e.lower() if e.startswith('.') else f'.{e.lower()}' for e in include_ext]: + if ext not in [ + e.lower() if e.startswith(".") else f".{e.lower()}" for e in include_ext + ]: return False if exclude_ext: - if ext in [e.lower() if e.startswith('.') else f'.{e.lower()}' for e in exclude_ext]: + if ext in [ + e.lower() if e.startswith(".") else f".{e.lower()}" for e in exclude_ext + ]: return False return True @@ -132,11 +154,13 @@ class DirectoryLoader(BaseLoader): if result.metadata is None: result.metadata = {} - result.metadata.update({ - "file_path": file_path, - "file_size": os.path.getsize(file_path), - "data_type": str(data_type), - "loader_type": loader.__class__.__name__ - }) + result.metadata.update( + { + "file_path": file_path, + "file_size": os.path.getsize(file_path), + "data_type": str(data_type), + "loader_type": loader.__class__.__name__, + } + ) return result diff --git a/packages/tools/src/crewai_tools/rag/loaders/docs_site_loader.py b/packages/tools/src/crewai_tools/rag/loaders/docs_site_loader.py new file mode 100644 index 000000000..6e828f830 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/docs_site_loader.py @@ -0,0 +1,106 @@ +"""Documentation site loader.""" + +from urllib.parse import urljoin, urlparse + +import requests +from bs4 import BeautifulSoup + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class DocsSiteLoader(BaseLoader): + """Loader for documentation websites.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load content from a documentation site. + + Args: + source: Documentation site URL + **kwargs: Additional arguments + + Returns: + LoaderResult with documentation content + """ + docs_url = source.source + + try: + response = requests.get(docs_url, timeout=30) + response.raise_for_status() + except requests.RequestException as e: + raise ValueError(f"Unable to fetch documentation from {docs_url}: {e}") + + soup = BeautifulSoup(response.text, "html.parser") + + for script in soup(["script", "style"]): + script.decompose() + + title = soup.find("title") + title_text = title.get_text(strip=True) if title else "Documentation" + + main_content = None + for selector in [ + "main", + "article", + '[role="main"]', + ".content", + "#content", + ".documentation", + ]: + main_content = soup.select_one(selector) + if main_content: + break + + if not main_content: + main_content = soup.find("body") + + if not main_content: + raise ValueError( + f"Unable to extract content from documentation site: {docs_url}" + ) + + text_parts = [f"Title: {title_text}", ""] + + headings = main_content.find_all(["h1", "h2", "h3"]) + if headings: + text_parts.append("Table of Contents:") + for heading in headings[:15]: + level = int(heading.name[1]) + indent = " " * (level - 1) + text_parts.append(f"{indent}- {heading.get_text(strip=True)}") + text_parts.append("") + + text = main_content.get_text(separator="\n", strip=True) + lines = [line.strip() for line in text.split("\n") if line.strip()] + text_parts.extend(lines) + + nav_links = [] + for nav_selector in ["nav", ".sidebar", ".toc", ".navigation"]: + nav = soup.select_one(nav_selector) + if nav: + links = nav.find_all("a", href=True) + for link in links[:20]: + href = link["href"] + if not href.startswith(("http://", "https://", "mailto:", "#")): + full_url = urljoin(docs_url, href) + nav_links.append(f"- {link.get_text(strip=True)}: {full_url}") + + if nav_links: + text_parts.append("") + text_parts.append("Related documentation pages:") + text_parts.extend(nav_links[:10]) + + content = "\n".join(text_parts) + + if len(content) > 100000: + content = content[:100000] + "\n\n[Content truncated...]" + + return LoaderResult( + content=content, + metadata={ + "source": docs_url, + "title": title_text, + "domain": urlparse(docs_url).netloc, + }, + doc_id=self.generate_doc_id(source_ref=docs_url, content=content), + ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/docx_loader.py b/packages/tools/src/crewai_tools/rag/loaders/docx_loader.py index 2f5df23af..f1a589b68 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/docx_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/docx_loader.py @@ -10,7 +10,9 @@ class DOCXLoader(BaseLoader): try: from docx import Document as DocxDocument except ImportError: - raise ImportError("python-docx is required for DOCX loading. Install with: 'uv pip install python-docx' or pip install crewai-tools[rag]") + raise ImportError( + "python-docx is required for DOCX loading. Install with: 'uv pip install python-docx' or pip install crewai-tools[rag]" + ) source_ref = source_content.source_ref @@ -23,28 +25,35 @@ class DOCXLoader(BaseLoader): elif source_content.path_exists(): return self._load_from_file(source_ref, source_ref, DocxDocument) else: - raise ValueError(f"Source must be a valid file path or URL, got: {source_content.source}") + raise ValueError( + f"Source must be a valid file path or URL, got: {source_content.source}" + ) def _download_from_url(self, url: str, kwargs: dict) -> str: import requests - headers = kwargs.get("headers", { - "Accept": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", - "User-Agent": "Mozilla/5.0 (compatible; crewai-tools DOCXLoader)" - }) + headers = kwargs.get( + "headers", + { + "Accept": "application/vnd.openxmlformats-officedocument.wordprocessingml.document", + "User-Agent": "Mozilla/5.0 (compatible; crewai-tools DOCXLoader)", + }, + ) try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() # Create temporary file to save the DOCX content - with tempfile.NamedTemporaryFile(suffix='.docx', delete=False) as temp_file: + with tempfile.NamedTemporaryFile(suffix=".docx", delete=False) as temp_file: temp_file.write(response.content) return temp_file.name except Exception as e: - raise ValueError(f"Error fetching DOCX from URL {url}: {str(e)}") + raise ValueError(f"Error fetching DOCX from URL {url}: {e!s}") - def _load_from_file(self, file_path: str, source_ref: str, DocxDocument) -> LoaderResult: + def _load_from_file( + self, file_path: str, source_ref: str, DocxDocument + ) -> LoaderResult: try: doc = DocxDocument(file_path) @@ -58,15 +67,15 @@ class DOCXLoader(BaseLoader): metadata = { "format": "docx", "paragraphs": len(doc.paragraphs), - "tables": len(doc.tables) + "tables": len(doc.tables), } return LoaderResult( content=content, source=source_ref, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=source_ref, content=content) + doc_id=self.generate_doc_id(source_ref=source_ref, content=content), ) except Exception as e: - raise ValueError(f"Error loading DOCX file: {str(e)}") + raise ValueError(f"Error loading DOCX file: {e!s}") diff --git a/packages/tools/src/crewai_tools/rag/loaders/github_loader.py b/packages/tools/src/crewai_tools/rag/loaders/github_loader.py new file mode 100644 index 000000000..367f7d789 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/github_loader.py @@ -0,0 +1,112 @@ +"""GitHub repository content loader.""" + +from github import Github, GithubException + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class GithubLoader(BaseLoader): + """Loader for GitHub repository content.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load content from a GitHub repository. + + Args: + source: GitHub repository URL + **kwargs: Additional arguments including gh_token and content_types + + Returns: + LoaderResult with repository content + """ + metadata = kwargs.get("metadata", {}) + gh_token = metadata.get("gh_token") + content_types = metadata.get("content_types", ["code", "repo"]) + + repo_url = source.source + if not repo_url.startswith("https://github.com/"): + raise ValueError(f"Invalid GitHub URL: {repo_url}") + + parts = repo_url.replace("https://github.com/", "").strip("/").split("/") + if len(parts) < 2: + raise ValueError(f"Invalid GitHub repository URL: {repo_url}") + + repo_name = f"{parts[0]}/{parts[1]}" + + g = Github(gh_token) if gh_token else Github() + + try: + repo = g.get_repo(repo_name) + except GithubException as e: + raise ValueError(f"Unable to access repository {repo_name}: {e}") + + all_content = [] + + if "repo" in content_types: + all_content.append(f"Repository: {repo.full_name}") + all_content.append(f"Description: {repo.description or 'No description'}") + all_content.append(f"Language: {repo.language or 'Not specified'}") + all_content.append(f"Stars: {repo.stargazers_count}") + all_content.append(f"Forks: {repo.forks_count}") + all_content.append("") + + if "code" in content_types: + try: + readme = repo.get_readme() + all_content.append("README:") + all_content.append( + readme.decoded_content.decode("utf-8", errors="ignore") + ) + all_content.append("") + except GithubException: + pass + + try: + contents = repo.get_contents("") + if isinstance(contents, list): + all_content.append("Repository structure:") + for content_file in contents[:20]: + all_content.append( + f"- {content_file.path} ({content_file.type})" + ) + all_content.append("") + except GithubException: + pass + + if "pr" in content_types: + prs = repo.get_pulls(state="open") + pr_list = list(prs[:5]) + if pr_list: + all_content.append("Recent Pull Requests:") + for pr in pr_list: + all_content.append(f"- PR #{pr.number}: {pr.title}") + if pr.body: + body_preview = pr.body[:200].replace("\n", " ") + all_content.append(f" {body_preview}") + all_content.append("") + + if "issue" in content_types: + issues = repo.get_issues(state="open") + issue_list = [i for i in list(issues[:10]) if not i.pull_request][:5] + if issue_list: + all_content.append("Recent Issues:") + for issue in issue_list: + all_content.append(f"- Issue #{issue.number}: {issue.title}") + if issue.body: + body_preview = issue.body[:200].replace("\n", " ") + all_content.append(f" {body_preview}") + all_content.append("") + + if not all_content: + raise ValueError(f"No content could be loaded from repository: {repo_url}") + + content = "\n".join(all_content) + return LoaderResult( + content=content, + metadata={ + "source": repo_url, + "repo": repo_name, + "content_types": content_types, + }, + doc_id=self.generate_doc_id(source_ref=repo_url, content=content), + ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/json_loader.py b/packages/tools/src/crewai_tools/rag/loaders/json_loader.py index 6efab393a..1bebba8c2 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/json_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/json_loader.py @@ -1,7 +1,7 @@ import json -from crewai_tools.rag.source_content import SourceContent from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent class JSONLoader(BaseLoader): @@ -19,17 +19,24 @@ class JSONLoader(BaseLoader): def _load_from_url(self, url: str, kwargs: dict) -> str: import requests - headers = kwargs.get("headers", { - "Accept": "application/json", - "User-Agent": "Mozilla/5.0 (compatible; crewai-tools JSONLoader)" - }) + headers = kwargs.get( + "headers", + { + "Accept": "application/json", + "User-Agent": "Mozilla/5.0 (compatible; crewai-tools JSONLoader)", + }, + ) try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() - return response.text if not self._is_json_response(response) else json.dumps(response.json(), indent=2) + return ( + response.text + if not self._is_json_response(response) + else json.dumps(response.json(), indent=2) + ) except Exception as e: - raise ValueError(f"Error fetching JSON from URL {url}: {str(e)}") + raise ValueError(f"Error fetching JSON from URL {url}: {e!s}") def _is_json_response(self, response) -> bool: try: @@ -46,7 +53,9 @@ class JSONLoader(BaseLoader): try: data = json.loads(content) if isinstance(data, dict): - text = "\n".join(f"{k}: {json.dumps(v, indent=0)}" for k, v in data.items()) + text = "\n".join( + f"{k}: {json.dumps(v, indent=0)}" for k, v in data.items() + ) elif isinstance(data, list): text = "\n".join(json.dumps(item, indent=0) for item in data) else: @@ -55,7 +64,7 @@ class JSONLoader(BaseLoader): metadata = { "format": "json", "type": type(data).__name__, - "size": len(data) if isinstance(data, (list, dict)) else 1 + "size": len(data) if isinstance(data, (list, dict)) else 1, } except json.JSONDecodeError as e: text = content @@ -65,5 +74,5 @@ class JSONLoader(BaseLoader): content=text, source=source_ref, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=source_ref, content=text) + doc_id=self.generate_doc_id(source_ref=source_ref, content=text), ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/mdx_loader.py b/packages/tools/src/crewai_tools/rag/loaders/mdx_loader.py index 6da9dc896..5b6e39933 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/mdx_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/mdx_loader.py @@ -3,6 +3,7 @@ import re from crewai_tools.rag.base_loader import BaseLoader, LoaderResult from crewai_tools.rag.source_content import SourceContent + class MDXLoader(BaseLoader): def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: source_ref = source_content.source_ref @@ -18,17 +19,20 @@ class MDXLoader(BaseLoader): def _load_from_url(self, url: str, kwargs: dict) -> str: import requests - headers = kwargs.get("headers", { - "Accept": "text/markdown, text/x-markdown, text/plain", - "User-Agent": "Mozilla/5.0 (compatible; crewai-tools MDXLoader)" - }) + headers = kwargs.get( + "headers", + { + "Accept": "text/markdown, text/x-markdown, text/plain", + "User-Agent": "Mozilla/5.0 (compatible; crewai-tools MDXLoader)", + }, + ) try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.text except Exception as e: - raise ValueError(f"Error fetching MDX from URL {url}: {str(e)}") + raise ValueError(f"Error fetching MDX from URL {url}: {e!s}") def _load_from_file(self, path: str) -> str: with open(path, "r", encoding="utf-8") as file: @@ -38,16 +42,20 @@ class MDXLoader(BaseLoader): cleaned_content = content # Remove import statements - cleaned_content = re.sub(r'^import\s+.*?\n', '', cleaned_content, flags=re.MULTILINE) + cleaned_content = re.sub( + r"^import\s+.*?\n", "", cleaned_content, flags=re.MULTILINE + ) # Remove export statements - cleaned_content = re.sub(r'^export\s+.*?(?:\n|$)', '', cleaned_content, flags=re.MULTILINE) + cleaned_content = re.sub( + r"^export\s+.*?(?:\n|$)", "", cleaned_content, flags=re.MULTILINE + ) # Remove JSX tags (simple approach) - cleaned_content = re.sub(r'<[^>]+>', '', cleaned_content) + cleaned_content = re.sub(r"<[^>]+>", "", cleaned_content) # Clean up extra whitespace - cleaned_content = re.sub(r'\n\s*\n\s*\n', '\n\n', cleaned_content) + cleaned_content = re.sub(r"\n\s*\n\s*\n", "\n\n", cleaned_content) cleaned_content = cleaned_content.strip() metadata = {"format": "mdx"} @@ -55,5 +63,5 @@ class MDXLoader(BaseLoader): content=cleaned_content, source=source_ref, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=source_ref, content=cleaned_content) + doc_id=self.generate_doc_id(source_ref=source_ref, content=cleaned_content), ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/mysql_loader.py b/packages/tools/src/crewai_tools/rag/loaders/mysql_loader.py new file mode 100644 index 000000000..c7a077d40 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/mysql_loader.py @@ -0,0 +1,100 @@ +"""MySQL database loader.""" + +from urllib.parse import urlparse + +import pymysql + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class MySQLLoader(BaseLoader): + """Loader for MySQL database content.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load content from a MySQL database table. + + Args: + source: SQL query (e.g., "SELECT * FROM table_name") + **kwargs: Additional arguments including db_uri + + Returns: + LoaderResult with database content + """ + metadata = kwargs.get("metadata", {}) + db_uri = metadata.get("db_uri") + + if not db_uri: + raise ValueError("Database URI is required for MySQL loader") + + query = source.source + + parsed = urlparse(db_uri) + if parsed.scheme not in ["mysql", "mysql+pymysql"]: + raise ValueError(f"Invalid MySQL URI scheme: {parsed.scheme}") + + connection_params = { + "host": parsed.hostname or "localhost", + "port": parsed.port or 3306, + "user": parsed.username, + "password": parsed.password, + "database": parsed.path.lstrip("/") if parsed.path else None, + "charset": "utf8mb4", + "cursorclass": pymysql.cursors.DictCursor, + } + + if not connection_params["database"]: + raise ValueError("Database name is required in the URI") + + try: + connection = pymysql.connect(**connection_params) + try: + with connection.cursor() as cursor: + cursor.execute(query) + rows = cursor.fetchall() + + if not rows: + content = "No data found in the table" + return LoaderResult( + content=content, + metadata={"source": query, "row_count": 0}, + doc_id=self.generate_doc_id( + source_ref=query, content=content + ), + ) + + text_parts = [] + + columns = list(rows[0].keys()) + text_parts.append(f"Columns: {', '.join(columns)}") + text_parts.append(f"Total rows: {len(rows)}") + text_parts.append("") + + for i, row in enumerate(rows, 1): + text_parts.append(f"Row {i}:") + for col, val in row.items(): + if val is not None: + text_parts.append(f" {col}: {val}") + text_parts.append("") + + content = "\n".join(text_parts) + + if len(content) > 100000: + content = content[:100000] + "\n\n[Content truncated...]" + + return LoaderResult( + content=content, + metadata={ + "source": query, + "database": connection_params["database"], + "row_count": len(rows), + "columns": columns, + }, + doc_id=self.generate_doc_id(source_ref=query, content=content), + ) + finally: + connection.close() + except pymysql.Error as e: + raise ValueError(f"MySQL database error: {e}") + except Exception as e: + raise ValueError(f"Failed to load data from MySQL: {e}") diff --git a/packages/tools/src/crewai_tools/rag/loaders/pdf_loader.py b/packages/tools/src/crewai_tools/rag/loaders/pdf_loader.py new file mode 100644 index 000000000..2ca4a1561 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/pdf_loader.py @@ -0,0 +1,71 @@ +"""PDF loader for extracting text from PDF files.""" + +import os +from pathlib import Path +from typing import Any + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class PDFLoader(BaseLoader): + """Loader for PDF files.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load and extract text from a PDF file. + + Args: + source: The source content containing the PDF file path + + Returns: + LoaderResult with extracted text content + + Raises: + FileNotFoundError: If the PDF file doesn't exist + ImportError: If required PDF libraries aren't installed + """ + try: + import pypdf + except ImportError: + try: + import PyPDF2 as pypdf + except ImportError: + raise ImportError( + "PDF support requires pypdf or PyPDF2. Install with: uv add pypdf" + ) + + file_path = source.source + + if not os.path.isfile(file_path): + raise FileNotFoundError(f"PDF file not found: {file_path}") + + text_content = [] + metadata: dict[str, Any] = { + "source": str(file_path), + "file_name": Path(file_path).name, + "file_type": "pdf", + } + + try: + with open(file_path, "rb") as file: + pdf_reader = pypdf.PdfReader(file) + metadata["num_pages"] = len(pdf_reader.pages) + + for page_num, page in enumerate(pdf_reader.pages, 1): + page_text = page.extract_text() + if page_text.strip(): + text_content.append(f"Page {page_num}:\n{page_text}") + except Exception as e: + raise ValueError(f"Error reading PDF file {file_path}: {e!s}") + + if not text_content: + content = f"[PDF file with no extractable text: {Path(file_path).name}]" + else: + content = "\n\n".join(text_content) + + return LoaderResult( + content=content, + source=str(file_path), + metadata=metadata, + doc_id=self.generate_doc_id(source_ref=str(file_path), content=content), + ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/postgres_loader.py b/packages/tools/src/crewai_tools/rag/loaders/postgres_loader.py new file mode 100644 index 000000000..99a0309d7 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/postgres_loader.py @@ -0,0 +1,100 @@ +"""PostgreSQL database loader.""" + +from urllib.parse import urlparse + +import psycopg2 +from psycopg2.extras import RealDictCursor + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class PostgresLoader(BaseLoader): + """Loader for PostgreSQL database content.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load content from a PostgreSQL database table. + + Args: + source: SQL query (e.g., "SELECT * FROM table_name") + **kwargs: Additional arguments including db_uri + + Returns: + LoaderResult with database content + """ + metadata = kwargs.get("metadata", {}) + db_uri = metadata.get("db_uri") + + if not db_uri: + raise ValueError("Database URI is required for PostgreSQL loader") + + query = source.source + + parsed = urlparse(db_uri) + if parsed.scheme not in ["postgresql", "postgres", "postgresql+psycopg2"]: + raise ValueError(f"Invalid PostgreSQL URI scheme: {parsed.scheme}") + + connection_params = { + "host": parsed.hostname or "localhost", + "port": parsed.port or 5432, + "user": parsed.username, + "password": parsed.password, + "database": parsed.path.lstrip("/") if parsed.path else None, + "cursor_factory": RealDictCursor, + } + + if not connection_params["database"]: + raise ValueError("Database name is required in the URI") + + try: + connection = psycopg2.connect(**connection_params) + try: + with connection.cursor() as cursor: + cursor.execute(query) + rows = cursor.fetchall() + + if not rows: + content = "No data found in the table" + return LoaderResult( + content=content, + metadata={"source": query, "row_count": 0}, + doc_id=self.generate_doc_id( + source_ref=query, content=content + ), + ) + + text_parts = [] + + columns = list(rows[0].keys()) + text_parts.append(f"Columns: {', '.join(columns)}") + text_parts.append(f"Total rows: {len(rows)}") + text_parts.append("") + + for i, row in enumerate(rows, 1): + text_parts.append(f"Row {i}:") + for col, val in row.items(): + if val is not None: + text_parts.append(f" {col}: {val}") + text_parts.append("") + + content = "\n".join(text_parts) + + if len(content) > 100000: + content = content[:100000] + "\n\n[Content truncated...]" + + return LoaderResult( + content=content, + metadata={ + "source": query, + "database": connection_params["database"], + "row_count": len(rows), + "columns": columns, + }, + doc_id=self.generate_doc_id(source_ref=query, content=content), + ) + finally: + connection.close() + except psycopg2.Error as e: + raise ValueError(f"PostgreSQL database error: {e}") + except Exception as e: + raise ValueError(f"Failed to load data from PostgreSQL: {e}") diff --git a/packages/tools/src/crewai_tools/rag/loaders/webpage_loader.py b/packages/tools/src/crewai_tools/rag/loaders/webpage_loader.py index 4fcb1e0c4..aec537f26 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/webpage_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/webpage_loader.py @@ -1,18 +1,23 @@ import re + import requests from bs4 import BeautifulSoup from crewai_tools.rag.base_loader import BaseLoader, LoaderResult from crewai_tools.rag.source_content import SourceContent + class WebPageLoader(BaseLoader): def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: url = source_content.source - headers = kwargs.get("headers", { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", - "Accept-Language": "en-US,en;q=0.9", - }) + headers = kwargs.get( + "headers", + { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", + "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", + "Accept-Language": "en-US,en;q=0.9", + }, + ) try: response = requests.get(url, timeout=15, headers=headers) @@ -28,20 +33,22 @@ class WebPageLoader(BaseLoader): text = re.sub("\\s+\n\\s+", "\n", text) text = text.strip() - title = soup.title.string.strip() if soup.title and soup.title.string else "" + title = ( + soup.title.string.strip() if soup.title and soup.title.string else "" + ) metadata = { "url": url, "title": title, "status_code": response.status_code, - "content_type": response.headers.get("content-type", "") + "content_type": response.headers.get("content-type", ""), } return LoaderResult( content=text, source=url, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=url, content=text) + doc_id=self.generate_doc_id(source_ref=url, content=text), ) except Exception as e: - raise ValueError(f"Error loading webpage {url}: {str(e)}") + raise ValueError(f"Error loading webpage {url}: {e!s}") diff --git a/packages/tools/src/crewai_tools/rag/loaders/xml_loader.py b/packages/tools/src/crewai_tools/rag/loaders/xml_loader.py index ffafdb9d9..ffac37f71 100644 --- a/packages/tools/src/crewai_tools/rag/loaders/xml_loader.py +++ b/packages/tools/src/crewai_tools/rag/loaders/xml_loader.py @@ -1,9 +1,9 @@ -import os import xml.etree.ElementTree as ET from crewai_tools.rag.base_loader import BaseLoader, LoaderResult from crewai_tools.rag.source_content import SourceContent + class XMLLoader(BaseLoader): def load(self, source_content: SourceContent, **kwargs) -> LoaderResult: source_ref = source_content.source_ref @@ -11,7 +11,7 @@ class XMLLoader(BaseLoader): if source_content.is_url(): content = self._load_from_url(source_ref, kwargs) - elif os.path.exists(source_ref): + elif source_content.path_exists(): content = self._load_from_file(source_ref) return self._parse_xml(content, source_ref) @@ -19,17 +19,20 @@ class XMLLoader(BaseLoader): def _load_from_url(self, url: str, kwargs: dict) -> str: import requests - headers = kwargs.get("headers", { - "Accept": "application/xml, text/xml, text/plain", - "User-Agent": "Mozilla/5.0 (compatible; crewai-tools XMLLoader)" - }) + headers = kwargs.get( + "headers", + { + "Accept": "application/xml, text/xml, text/plain", + "User-Agent": "Mozilla/5.0 (compatible; crewai-tools XMLLoader)", + }, + ) try: response = requests.get(url, headers=headers, timeout=30) response.raise_for_status() return response.text except Exception as e: - raise ValueError(f"Error fetching XML from URL {url}: {str(e)}") + raise ValueError(f"Error fetching XML from URL {url}: {e!s}") def _load_from_file(self, path: str) -> str: with open(path, "r", encoding="utf-8") as file: @@ -37,7 +40,7 @@ class XMLLoader(BaseLoader): def _parse_xml(self, content: str, source_ref: str) -> LoaderResult: try: - if content.strip().startswith('<'): + if content.strip().startswith("<"): root = ET.fromstring(content) else: root = ET.parse(source_ref).getroot() @@ -57,5 +60,5 @@ class XMLLoader(BaseLoader): content=text, source=source_ref, metadata=metadata, - doc_id=self.generate_doc_id(source_ref=source_ref, content=text) + doc_id=self.generate_doc_id(source_ref=source_ref, content=text), ) diff --git a/packages/tools/src/crewai_tools/rag/loaders/youtube_channel_loader.py b/packages/tools/src/crewai_tools/rag/loaders/youtube_channel_loader.py new file mode 100644 index 000000000..c2d9c2ad1 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/youtube_channel_loader.py @@ -0,0 +1,162 @@ +"""YouTube channel loader for extracting content from YouTube channels.""" + +import re +from typing import Any + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class YoutubeChannelLoader(BaseLoader): + """Loader for YouTube channels.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load and extract content from a YouTube channel. + + Args: + source: The source content containing the YouTube channel URL + + Returns: + LoaderResult with channel content + + Raises: + ImportError: If required YouTube libraries aren't installed + ValueError: If the URL is not a valid YouTube channel URL + """ + try: + from pytube import Channel + except ImportError: + raise ImportError( + "YouTube channel support requires pytube. Install with: uv add pytube" + ) + + channel_url = source.source + + if not any( + pattern in channel_url + for pattern in [ + "youtube.com/channel/", + "youtube.com/c/", + "youtube.com/@", + "youtube.com/user/", + ] + ): + raise ValueError(f"Invalid YouTube channel URL: {channel_url}") + + metadata: dict[str, Any] = { + "source": channel_url, + "data_type": "youtube_channel", + } + + try: + channel = Channel(channel_url) + + metadata["channel_name"] = channel.channel_name + metadata["channel_id"] = channel.channel_id + + max_videos = kwargs.get("max_videos", 10) + video_urls = list(channel.video_urls)[:max_videos] + metadata["num_videos_loaded"] = len(video_urls) + metadata["total_videos"] = len(list(channel.video_urls)) + + content_parts = [ + f"YouTube Channel: {channel.channel_name}", + f"Channel ID: {channel.channel_id}", + f"Total Videos: {metadata['total_videos']}", + f"Videos Loaded: {metadata['num_videos_loaded']}", + "\n--- Video Summaries ---\n", + ] + + try: + from pytube import YouTube + from youtube_transcript_api import YouTubeTranscriptApi + + for i, video_url in enumerate(video_urls, 1): + try: + video_id = self._extract_video_id(video_url) + if not video_id: + continue + yt = YouTube(video_url) + title = yt.title or f"Video {i}" + description = ( + yt.description[:200] if yt.description else "No description" + ) + + content_parts.append(f"\n{i}. {title}") + content_parts.append(f" URL: {video_url}") + content_parts.append(f" Description: {description}...") + + try: + api = YouTubeTranscriptApi() + transcript_list = api.list(video_id) + transcript = None + + try: + transcript = transcript_list.find_transcript(["en"]) + except: + try: + transcript = ( + transcript_list.find_generated_transcript( + ["en"] + ) + ) + except: + transcript = next(iter(transcript_list), None) + + if transcript: + transcript_data = transcript.fetch() + text_parts = [] + char_count = 0 + for entry in transcript_data: + text = ( + entry.text.strip() + if hasattr(entry, "text") + else "" + ) + if text: + text_parts.append(text) + char_count += len(text) + if char_count > 500: + break + + if text_parts: + preview = " ".join(text_parts)[:500] + content_parts.append( + f" Transcript Preview: {preview}..." + ) + except: + content_parts.append(" Transcript: Not available") + + except Exception as e: + content_parts.append(f"\n{i}. Error loading video: {e!s}") + + except ImportError: + for i, video_url in enumerate(video_urls, 1): + content_parts.append(f"\n{i}. {video_url}") + + content = "\n".join(content_parts) + + except Exception as e: + raise ValueError( + f"Unable to load YouTube channel {channel_url}: {e!s}" + ) from e + + return LoaderResult( + content=content, + source=channel_url, + metadata=metadata, + doc_id=self.generate_doc_id(source_ref=channel_url, content=content), + ) + + def _extract_video_id(self, url: str) -> str | None: + """Extract video ID from YouTube URL.""" + patterns = [ + r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([^&\n?#]+)", + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + return None diff --git a/packages/tools/src/crewai_tools/rag/loaders/youtube_video_loader.py b/packages/tools/src/crewai_tools/rag/loaders/youtube_video_loader.py new file mode 100644 index 000000000..a0f20aa23 --- /dev/null +++ b/packages/tools/src/crewai_tools/rag/loaders/youtube_video_loader.py @@ -0,0 +1,134 @@ +"""YouTube video loader for extracting transcripts from YouTube videos.""" + +import re +from typing import Any +from urllib.parse import parse_qs, urlparse + +from crewai_tools.rag.base_loader import BaseLoader, LoaderResult +from crewai_tools.rag.source_content import SourceContent + + +class YoutubeVideoLoader(BaseLoader): + """Loader for YouTube videos.""" + + def load(self, source: SourceContent, **kwargs) -> LoaderResult: + """Load and extract transcript from a YouTube video. + + Args: + source: The source content containing the YouTube URL + + Returns: + LoaderResult with transcript content + + Raises: + ImportError: If required YouTube libraries aren't installed + ValueError: If the URL is not a valid YouTube video URL + """ + try: + from youtube_transcript_api import YouTubeTranscriptApi + except ImportError: + raise ImportError( + "YouTube support requires youtube-transcript-api. " + "Install with: uv add youtube-transcript-api" + ) + + video_url = source.source + video_id = self._extract_video_id(video_url) + + if not video_id: + raise ValueError(f"Invalid YouTube URL: {video_url}") + + metadata: dict[str, Any] = { + "source": video_url, + "video_id": video_id, + "data_type": "youtube_video", + } + + try: + api = YouTubeTranscriptApi() + transcript_list = api.list(video_id) + + transcript = None + try: + transcript = transcript_list.find_transcript(["en"]) + except: + try: + transcript = transcript_list.find_generated_transcript(["en"]) + except: + transcript = next(iter(transcript_list)) + + if transcript: + metadata["language"] = transcript.language + metadata["is_generated"] = transcript.is_generated + + transcript_data = transcript.fetch() + + text_content = [] + for entry in transcript_data: + text = entry.text.strip() if hasattr(entry, "text") else "" + if text: + text_content.append(text) + + content = " ".join(text_content) + + try: + from pytube import YouTube + + yt = YouTube(video_url) + metadata["title"] = yt.title + metadata["author"] = yt.author + metadata["length_seconds"] = yt.length + metadata["description"] = ( + yt.description[:500] if yt.description else None + ) + + if yt.title: + content = f"Title: {yt.title}\n\nAuthor: {yt.author or 'Unknown'}\n\nTranscript:\n{content}" + except: + pass + else: + raise ValueError( + f"No transcript available for YouTube video: {video_id}" + ) + + except Exception as e: + raise ValueError( + f"Unable to extract transcript from YouTube video {video_id}: {e!s}" + ) from e + + return LoaderResult( + content=content, + source=video_url, + metadata=metadata, + doc_id=self.generate_doc_id(source_ref=video_url, content=content), + ) + + def _extract_video_id(self, url: str) -> str | None: + """Extract video ID from various YouTube URL formats.""" + patterns = [ + r"(?:youtube\.com\/watch\?v=|youtu\.be\/|youtube\.com\/embed\/|youtube\.com\/v\/)([^&\n?#]+)", + ] + + for pattern in patterns: + match = re.search(pattern, url) + if match: + return match.group(1) + + try: + parsed = urlparse(url) + hostname = parsed.hostname + if hostname: + hostname_lower = hostname.lower() + # Allow youtube.com and any subdomain of youtube.com, plus youtu.be shortener + if ( + hostname_lower == "youtube.com" + or hostname_lower.endswith(".youtube.com") + or hostname_lower == "youtu.be" + ): + query_params = parse_qs(parsed.query) + if "v" in query_params: + return query_params["v"][0] + except: + pass + + return None diff --git a/packages/tools/src/crewai_tools/rag/misc.py b/packages/tools/src/crewai_tools/rag/misc.py index 5b95f804e..c508238e9 100644 --- a/packages/tools/src/crewai_tools/rag/misc.py +++ b/packages/tools/src/crewai_tools/rag/misc.py @@ -1,4 +1,31 @@ import hashlib +from typing import Any + def compute_sha256(content: str) -> str: return hashlib.sha256(content.encode("utf-8")).hexdigest() + + +def sanitize_metadata_for_chromadb(metadata: dict[str, Any]) -> dict[str, Any]: + """Sanitize metadata to ensure ChromaDB compatibility. + + ChromaDB only accepts str, int, float, or bool values in metadata. + This function converts other types to strings. + + Args: + metadata: Dictionary of metadata to sanitize + + Returns: + Sanitized metadata dictionary with only ChromaDB-compatible types + """ + sanitized = {} + for key, value in metadata.items(): + if isinstance(value, (str, int, float, bool)) or value is None: + sanitized[key] = value + elif isinstance(value, (list, tuple)): + # Convert lists/tuples to pipe-separated strings + sanitized[key] = " | ".join(str(v) for v in value) + else: + # Convert other types to string + sanitized[key] = str(value) + return sanitized diff --git a/packages/tools/src/crewai_tools/rag/source_content.py b/packages/tools/src/crewai_tools/rag/source_content.py index 59530c8d8..3abbc1eaa 100644 --- a/packages/tools/src/crewai_tools/rag/source_content.py +++ b/packages/tools/src/crewai_tools/rag/source_content.py @@ -1,8 +1,8 @@ import os -from urllib.parse import urlparse -from typing import TYPE_CHECKING -from pathlib import Path from functools import cached_property +from pathlib import Path +from typing import TYPE_CHECKING +from urllib.parse import urlparse from crewai_tools.rag.misc import compute_sha256 @@ -34,7 +34,7 @@ class SourceContent: @cached_property def source_ref(self) -> str: - """" + """ " Returns the source reference for the content. If the content is a URL or a local file, returns the source. Otherwise, returns the hash of the content. diff --git a/packages/tools/src/crewai_tools/tools/__init__.py b/packages/tools/src/crewai_tools/tools/__init__.py index 2b0bb968a..3e36b8dd0 100644 --- a/packages/tools/src/crewai_tools/tools/__init__.py +++ b/packages/tools/src/crewai_tools/tools/__init__.py @@ -70,6 +70,9 @@ from .oxylabs_google_search_scraper_tool.oxylabs_google_search_scraper_tool impo from .oxylabs_universal_scraper_tool.oxylabs_universal_scraper_tool import ( OxylabsUniversalScraperTool, ) +from .parallel_tools import ( + ParallelSearchTool, +) from .patronus_eval_tool import ( PatronusEvalTool, PatronusLocalEvaluatorTool, @@ -122,6 +125,3 @@ from .youtube_channel_search_tool.youtube_channel_search_tool import ( ) from .youtube_video_search_tool.youtube_video_search_tool import YoutubeVideoSearchTool from .zapier_action_tool.zapier_action_tool import ZapierActionTools -from .parallel_tools import ( - ParallelSearchTool, -) diff --git a/packages/tools/src/crewai_tools/tools/ai_mind_tool/ai_mind_tool.py b/packages/tools/src/crewai_tools/tools/ai_mind_tool/ai_mind_tool.py index ea6b19281..257fc74a3 100644 --- a/packages/tools/src/crewai_tools/tools/ai_mind_tool/ai_mind_tool.py +++ b/packages/tools/src/crewai_tools/tools/ai_mind_tool/ai_mind_tool.py @@ -1,6 +1,6 @@ import os import secrets -from typing import Any, Dict, List, Optional, Type +from typing import Any from crewai.tools import BaseTool, EnvVar from openai import OpenAI @@ -28,20 +28,22 @@ class AIMindTool(BaseTool): "and Google BigQuery. " "Input should be a question in natural language." ) - args_schema: Type[BaseModel] = AIMindToolInputSchema - api_key: Optional[str] = None - datasources: Optional[List[Dict[str, Any]]] = None - mind_name: Optional[str] = None - package_dependencies: List[str] = ["minds-sdk"] - env_vars: List[EnvVar] = [ + args_schema: type[BaseModel] = AIMindToolInputSchema + api_key: str | None = None + datasources: list[dict[str, Any]] | None = None + mind_name: str | None = None + package_dependencies: list[str] = ["minds-sdk"] + env_vars: list[EnvVar] = [ EnvVar(name="MINDS_API_KEY", description="API key for AI-Minds", required=True), ] - def __init__(self, api_key: Optional[str] = None, **kwargs): + def __init__(self, api_key: str | None = None, **kwargs): super().__init__(**kwargs) self.api_key = api_key or os.getenv("MINDS_API_KEY") if not self.api_key: - raise ValueError("API key must be provided either through constructor or MINDS_API_KEY environment variable") + raise ValueError( + "API key must be provided either through constructor or MINDS_API_KEY environment variable" + ) try: from minds.client import Client # type: ignore @@ -74,13 +76,12 @@ class AIMindTool(BaseTool): self.mind_name = mind.name - def _run( - self, - query: str - ): + def _run(self, query: str): # Run the query on the AI-Mind. # The Minds API is OpenAI compatible and therefore, the OpenAI client can be used. - openai_client = OpenAI(base_url=AIMindToolConstants.MINDS_API_BASE_URL, api_key=self.api_key) + openai_client = OpenAI( + base_url=AIMindToolConstants.MINDS_API_BASE_URL, api_key=self.api_key + ) completion = openai_client.chat.completions.create( model=self.mind_name, diff --git a/packages/tools/src/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py b/packages/tools/src/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py index 127169676..59545dbcb 100644 --- a/packages/tools/src/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py +++ b/packages/tools/src/crewai_tools/tools/apify_actors_tool/apify_actors_tool.py @@ -1,14 +1,20 @@ +import os +from typing import TYPE_CHECKING, Any, ClassVar + from crewai.tools import BaseTool, EnvVar from pydantic import Field -from typing import TYPE_CHECKING, Any, Dict, List -import os if TYPE_CHECKING: from langchain_apify import ApifyActorsTool as _ApifyActorsTool + class ApifyActorsTool(BaseTool): - env_vars: List[EnvVar] = [ - EnvVar(name="APIFY_API_TOKEN", description="API token for Apify platform access", required=True), + env_vars: ClassVar[list[EnvVar]] = [ + EnvVar( + name="APIFY_API_TOKEN", + description="API token for Apify platform access", + required=True, + ), ] """Tool that runs Apify Actors. @@ -40,15 +46,10 @@ class ApifyActorsTool(BaseTool): print(f"URL: {result['metadata']['url']}") print(f"Content: {result.get('markdown', 'N/A')[:100]}...") """ - actor_tool: '_ApifyActorsTool' = Field(description="Apify Actor Tool") - package_dependencies: List[str] = ["langchain-apify"] + actor_tool: "_ApifyActorsTool" = Field(description="Apify Actor Tool") + package_dependencies: ClassVar[list[str]] = ["langchain-apify"] - def __init__( - self, - actor_name: str, - *args: Any, - **kwargs: Any - ) -> None: + def __init__(self, actor_name: str, *args: Any, **kwargs: Any) -> None: if not os.environ.get("APIFY_API_TOKEN"): msg = ( "APIFY_API_TOKEN environment variable is not set. " @@ -59,11 +60,11 @@ class ApifyActorsTool(BaseTool): try: from langchain_apify import ApifyActorsTool as _ApifyActorsTool - except ImportError: + except ImportError as e: raise ImportError( "Could not import langchain_apify python package. " "Please install it with `pip install langchain-apify` or `uv add langchain-apify`." - ) + ) from e actor_tool = _ApifyActorsTool(actor_name) kwargs.update( @@ -76,7 +77,7 @@ class ApifyActorsTool(BaseTool): ) super().__init__(*args, **kwargs) - def _run(self, run_input: Dict[str, Any]) -> List[Dict[str, Any]]: + def _run(self, run_input: dict[str, Any]) -> list[dict[str, Any]]: """Run the Actor tool with the given input. Returns: @@ -89,8 +90,8 @@ class ApifyActorsTool(BaseTool): return self.actor_tool._run(run_input) except Exception as e: msg = ( - f'Failed to run ApifyActorsTool {self.name}. ' - 'Please check your Apify account Actor run logs for more details.' - f'Error: {e}' + f"Failed to run ApifyActorsTool {self.name}. " + "Please check your Apify account Actor run logs for more details." + f"Error: {e}" ) raise RuntimeError(msg) from e diff --git a/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py b/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py index acd6bbe77..997d92bfd 100644 --- a/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py +++ b/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool.py @@ -1,35 +1,44 @@ +import logging import re import time -import urllib.request -import urllib.parse import urllib.error +import urllib.parse +import urllib.request import xml.etree.ElementTree as ET -from typing import Type, List, Optional, ClassVar -from pydantic import BaseModel, Field -from crewai.tools import BaseTool,EnvVar -import logging from pathlib import Path +from typing import ClassVar + +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field logger = logging.getLogger(__file__) + class ArxivToolInput(BaseModel): - search_query: str = Field(..., description="Search query for Arxiv, e.g., 'transformer neural network'") - max_results: int = Field(5, ge=1, le=100, description="Max results to fetch; must be between 1 and 100") + search_query: str = Field( + ..., description="Search query for Arxiv, e.g., 'transformer neural network'" + ) + max_results: int = Field( + 5, ge=1, le=100, description="Max results to fetch; must be between 1 and 100" + ) + class ArxivPaperTool(BaseTool): BASE_API_URL: ClassVar[str] = "http://export.arxiv.org/api/query" SLEEP_DURATION: ClassVar[int] = 1 SUMMARY_TRUNCATE_LENGTH: ClassVar[int] = 300 ATOM_NAMESPACE: ClassVar[str] = "{http://www.w3.org/2005/Atom}" - REQUEST_TIMEOUT: ClassVar[int] = 10 + REQUEST_TIMEOUT: ClassVar[int] = 10 name: str = "Arxiv Paper Fetcher and Downloader" description: str = "Fetches metadata from Arxiv based on a search query and optionally downloads PDFs." - args_schema: Type[BaseModel] = ArxivToolInput - model_config = {"extra": "allow"} - package_dependencies: List[str] = ["pydantic"] - env_vars: List[EnvVar] = [] - - def __init__(self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False): + args_schema: type[BaseModel] = ArxivToolInput + model_config = {"extra": "allow"} + package_dependencies: list[str] = ["pydantic"] + env_vars: list[EnvVar] = [] + + def __init__( + self, download_pdfs=False, save_dir="./arxiv_pdfs", use_title_as_filename=False + ): super().__init__() self.download_pdfs = download_pdfs self.save_dir = save_dir @@ -38,44 +47,49 @@ class ArxivPaperTool(BaseTool): def _run(self, search_query: str, max_results: int = 5) -> str: try: args = ArxivToolInput(search_query=search_query, max_results=max_results) - logger.info(f"Running Arxiv tool: query='{args.search_query}', max_results={args.max_results}, " - f"download_pdfs={self.download_pdfs}, save_dir='{self.save_dir}', " - f"use_title_as_filename={self.use_title_as_filename}") + logger.info( + f"Running Arxiv tool: query='{args.search_query}', max_results={args.max_results}, " + f"download_pdfs={self.download_pdfs}, save_dir='{self.save_dir}', " + f"use_title_as_filename={self.use_title_as_filename}" + ) papers = self.fetch_arxiv_data(args.search_query, args.max_results) if self.download_pdfs: save_dir = self._validate_save_path(self.save_dir) for paper in papers: - if paper['pdf_url']: + if paper["pdf_url"]: if self.use_title_as_filename: - safe_title = re.sub(r'[\\/*?:"<>|]', "_", paper['title']).strip() - filename_base = safe_title or paper['arxiv_id'] + safe_title = re.sub( + r'[\\/*?:"<>|]', "_", paper["title"] + ).strip() + filename_base = safe_title or paper["arxiv_id"] else: - filename_base = paper['arxiv_id'] + filename_base = paper["arxiv_id"] filename = f"{filename_base[:500]}.pdf" save_path = Path(save_dir) / filename - self.download_pdf(paper['pdf_url'], save_path) + self.download_pdf(paper["pdf_url"], save_path) time.sleep(self.SLEEP_DURATION) results = [self._format_paper_result(p) for p in papers] return "\n\n" + "-" * 80 + "\n\n".join(results) except Exception as e: - logger.error(f"ArxivTool Error: {str(e)}") - return f"Failed to fetch or download Arxiv papers: {str(e)}" - + logger.error(f"ArxivTool Error: {e!s}") + return f"Failed to fetch or download Arxiv papers: {e!s}" - def fetch_arxiv_data(self, search_query: str, max_results: int) -> List[dict]: + def fetch_arxiv_data(self, search_query: str, max_results: int) -> list[dict]: api_url = f"{self.BASE_API_URL}?search_query={urllib.parse.quote(search_query)}&start=0&max_results={max_results}" logger.info(f"Fetching data from Arxiv API: {api_url}") try: - with urllib.request.urlopen(api_url, timeout=self.REQUEST_TIMEOUT) as response: + with urllib.request.urlopen( + api_url, timeout=self.REQUEST_TIMEOUT + ) as response: if response.status != 200: raise Exception(f"HTTP {response.status}: {response.reason}") - data = response.read().decode('utf-8') + data = response.read().decode("utf-8") except urllib.error.URLError as e: logger.error(f"Error fetching data from Arxiv: {e}") raise @@ -85,7 +99,7 @@ class ArxivPaperTool(BaseTool): for entry in root.findall(self.ATOM_NAMESPACE + "entry"): raw_id = self._get_element_text(entry, "id") - arxiv_id = raw_id.split('/')[-1].replace('.', '_') if raw_id else "unknown" + arxiv_id = raw_id.split("/")[-1].replace(".", "_") if raw_id else "unknown" title = self._get_element_text(entry, "title") or "No Title" summary = self._get_element_text(entry, "summary") or "No Summary" @@ -97,41 +111,48 @@ class ArxivPaperTool(BaseTool): pdf_url = self._extract_pdf_url(entry) - papers.append({ - "arxiv_id": arxiv_id, - "title": title, - "summary": summary, - "authors": authors, - "published_date": published, - "pdf_url": pdf_url - }) + papers.append( + { + "arxiv_id": arxiv_id, + "title": title, + "summary": summary, + "authors": authors, + "published_date": published, + "pdf_url": pdf_url, + } + ) return papers @staticmethod - def _get_element_text(entry: ET.Element, element_name: str) -> Optional[str]: - elem = entry.find(f'{ArxivPaperTool.ATOM_NAMESPACE}{element_name}') + def _get_element_text(entry: ET.Element, element_name: str) -> str | None: + elem = entry.find(f"{ArxivPaperTool.ATOM_NAMESPACE}{element_name}") return elem.text.strip() if elem is not None and elem.text else None - def _extract_pdf_url(self, entry: ET.Element) -> Optional[str]: + def _extract_pdf_url(self, entry: ET.Element) -> str | None: for link in entry.findall(self.ATOM_NAMESPACE + "link"): - if link.attrib.get('title', '').lower() == 'pdf': - return link.attrib.get('href') + if link.attrib.get("title", "").lower() == "pdf": + return link.attrib.get("href") for link in entry.findall(self.ATOM_NAMESPACE + "link"): - href = link.attrib.get('href') - if href and 'pdf' in href: + href = link.attrib.get("href") + if href and "pdf" in href: return href return None def _format_paper_result(self, paper: dict) -> str: - summary = (paper['summary'][:self.SUMMARY_TRUNCATE_LENGTH] + '...') \ - if len(paper['summary']) > self.SUMMARY_TRUNCATE_LENGTH else paper['summary'] - authors_str = ', '.join(paper['authors']) - return (f"Title: {paper['title']}\n" - f"Authors: {authors_str}\n" - f"Published: {paper['published_date']}\n" - f"PDF: {paper['pdf_url'] or 'N/A'}\n" - f"Summary: {summary}") + summary = ( + (paper["summary"][: self.SUMMARY_TRUNCATE_LENGTH] + "...") + if len(paper["summary"]) > self.SUMMARY_TRUNCATE_LENGTH + else paper["summary"] + ) + authors_str = ", ".join(paper["authors"]) + return ( + f"Title: {paper['title']}\n" + f"Authors: {authors_str}\n" + f"Published: {paper['published_date']}\n" + f"PDF: {paper['pdf_url'] or 'N/A'}\n" + f"Summary: {summary}" + ) @staticmethod def _validate_save_path(path: str) -> Path: diff --git a/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py b/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py index 4f8747d2f..781890802 100644 --- a/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py +++ b/packages/tools/src/crewai_tools/tools/arxiv_paper_tool/arxiv_paper_tool_test.py @@ -1,16 +1,19 @@ -import pytest import urllib.error -from unittest.mock import patch, MagicMock, mock_open -from pathlib import Path import xml.etree.ElementTree as ET +from pathlib import Path +from unittest.mock import MagicMock, patch + +import pytest from crewai_tools import ArxivPaperTool + @pytest.fixture def tool(): return ArxivPaperTool(download_pdfs=False) + def mock_arxiv_response(): - return ''' + return """ http://arxiv.org/abs/1234.5678 @@ -20,7 +23,8 @@ def mock_arxiv_response(): John Doe - ''' + """ + @patch("urllib.request.urlopen") def test_fetch_arxiv_data(mock_urlopen, tool): @@ -31,24 +35,30 @@ def test_fetch_arxiv_data(mock_urlopen, tool): results = tool.fetch_arxiv_data("transformer", 1) assert isinstance(results, list) - assert results[0]['title'] == "Sample Paper" + assert results[0]["title"] == "Sample Paper" + @patch("urllib.request.urlopen", side_effect=urllib.error.URLError("Timeout")) def test_fetch_arxiv_data_network_error(mock_urlopen, tool): with pytest.raises(urllib.error.URLError): tool.fetch_arxiv_data("transformer", 1) + @patch("urllib.request.urlretrieve") def test_download_pdf_success(mock_urlretrieve): tool = ArxivPaperTool() tool.download_pdf("http://arxiv.org/pdf/1234.5678.pdf", Path("test.pdf")) mock_urlretrieve.assert_called_once() + @patch("urllib.request.urlretrieve", side_effect=OSError("Permission denied")) def test_download_pdf_oserror(mock_urlretrieve): tool = ArxivPaperTool() with pytest.raises(OSError): - tool.download_pdf("http://arxiv.org/pdf/1234.5678.pdf", Path("/restricted/test.pdf")) + tool.download_pdf( + "http://arxiv.org/pdf/1234.5678.pdf", Path("/restricted/test.pdf") + ) + @patch("urllib.request.urlopen") @patch("urllib.request.urlretrieve") @@ -63,6 +73,7 @@ def test_run_with_download(mock_urlretrieve, mock_urlopen): assert "Title: Sample Paper" in output mock_urlretrieve.assert_called_once() + @patch("urllib.request.urlopen") def test_run_no_download(mock_urlopen): mock_response = MagicMock() @@ -74,12 +85,14 @@ def test_run_no_download(mock_urlopen): result = tool._run("transformer", 1) assert "Title: Sample Paper" in result + @patch("pathlib.Path.mkdir") def test_validate_save_path_creates_directory(mock_mkdir): path = ArxivPaperTool._validate_save_path("new_folder") mock_mkdir.assert_called_once_with(parents=True, exist_ok=True) assert isinstance(path, Path) + @patch("urllib.request.urlopen") def test_run_handles_exception(mock_urlopen): mock_urlopen.side_effect = Exception("API failure") @@ -98,16 +111,20 @@ def test_invalid_xml_response(mock_urlopen, tool): with pytest.raises(ET.ParseError): tool.fetch_arxiv_data("quantum", 1) + @patch.object(ArxivPaperTool, "fetch_arxiv_data") def test_run_with_max_results(mock_fetch, tool): - mock_fetch.return_value = [{ - "arxiv_id": f"test_{i}", - "title": f"Title {i}", - "summary": "Summary", - "authors": ["Author"], - "published_date": "2023-01-01", - "pdf_url": None - } for i in range(100)] + mock_fetch.return_value = [ + { + "arxiv_id": f"test_{i}", + "title": f"Title {i}", + "summary": "Summary", + "authors": ["Author"], + "published_date": "2023-01-01", + "pdf_url": None, + } + for i in range(100) + ] result = tool._run(search_query="test", max_results=100) assert result.count("Title:") == 100 diff --git a/packages/tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py b/packages/tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py index 1f96d452a..a7a9f4b98 100644 --- a/packages/tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/brave_search_tool/brave_search_tool.py @@ -1,7 +1,7 @@ import datetime import os import time -from typing import Any, ClassVar, List, Optional, Type +from typing import Any, ClassVar import requests from crewai.tools import BaseTool, EnvVar @@ -41,15 +41,17 @@ class BraveSearchTool(BaseTool): description: str = ( "A tool that can be used to search the internet with a search_query." ) - args_schema: Type[BaseModel] = BraveSearchToolSchema + args_schema: type[BaseModel] = BraveSearchToolSchema search_url: str = "https://api.search.brave.com/res/v1/web/search" - country: Optional[str] = "" + country: str | None = "" n_results: int = 10 save_file: bool = False _last_request_time: ClassVar[float] = 0 _min_request_interval: ClassVar[float] = 1.0 # seconds - env_vars: List[EnvVar] = [ - EnvVar(name="BRAVE_API_KEY", description="API key for Brave Search", required=True), + env_vars: ClassVar[list[EnvVar]] = [ + EnvVar( + name="BRAVE_API_KEY", description="API key for Brave Search", required=True + ), ] def __init__(self, *args, **kwargs): @@ -87,7 +89,9 @@ class BraveSearchTool(BaseTool): "Accept": "application/json", } - response = requests.get(self.search_url, headers=headers, params=payload) + response = requests.get( + self.search_url, headers=headers, params=payload, timeout=30 + ) response.raise_for_status() # Handle non-200 responses results = response.json() @@ -111,11 +115,10 @@ class BraveSearchTool(BaseTool): content = "\n".join(string) except requests.RequestException as e: - return f"Error performing search: {str(e)}" + return f"Error performing search: {e!s}" except KeyError as e: - return f"Error parsing search results: {str(e)}" + return f"Error parsing search results: {e!s}" if save_file: _save_results_to_file(content) return f"\nSearch results: {content}\n" - else: - return content + return content diff --git a/packages/tools/src/crewai_tools/tools/brightdata_tool/__init__.py b/packages/tools/src/crewai_tools/tools/brightdata_tool/__init__.py index 0842e97ea..7eb190883 100644 --- a/packages/tools/src/crewai_tools/tools/brightdata_tool/__init__.py +++ b/packages/tools/src/crewai_tools/tools/brightdata_tool/__init__.py @@ -2,8 +2,4 @@ from .brightdata_dataset import BrightDataDatasetTool from .brightdata_serp import BrightDataSearchTool from .brightdata_unlocker import BrightDataWebUnlockerTool -__all__ = [ - "BrightDataDatasetTool", - "BrightDataSearchTool", - "BrightDataWebUnlockerTool" -] \ No newline at end of file +__all__ = ["BrightDataDatasetTool", "BrightDataSearchTool", "BrightDataWebUnlockerTool"] diff --git a/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_dataset.py b/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_dataset.py index 88ca65077..644ebc0d5 100644 --- a/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_dataset.py +++ b/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_dataset.py @@ -1,11 +1,12 @@ import asyncio import os -from typing import Any, Dict, Optional, Type +from typing import Any import aiohttp from crewai.tools import BaseTool from pydantic import BaseModel, Field + class BrightDataConfig(BaseModel): API_URL: str = "https://api.brightdata.com" DEFAULT_TIMEOUT: int = 600 @@ -16,8 +17,12 @@ class BrightDataConfig(BaseModel): return cls( API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com"), DEFAULT_TIMEOUT=int(os.environ.get("BRIGHTDATA_DEFAULT_TIMEOUT", "600")), - DEFAULT_POLLING_INTERVAL=int(os.environ.get("BRIGHTDATA_DEFAULT_POLLING_INTERVAL", "1")) + DEFAULT_POLLING_INTERVAL=int( + os.environ.get("BRIGHTDATA_DEFAULT_POLLING_INTERVAL", "1") + ), ) + + class BrightDataDatasetToolException(Exception): """Exception raised for custom error in the application.""" @@ -43,15 +48,16 @@ class BrightDataDatasetToolSchema(BaseModel): """ dataset_type: str = Field(..., description="The Bright Data Dataset Type") - format: Optional[str] = Field( + format: str | None = Field( default="json", description="Response format (json by default)" ) url: str = Field(..., description="The URL to extract data from") - zipcode: Optional[str] = Field(default=None, description="Optional zipcode") - additional_params: Optional[Dict[str, Any]] = Field( + zipcode: str | None = Field(default=None, description="Optional zipcode") + additional_params: dict[str, Any] | None = Field( default=None, description="Additional params if any" ) + config = BrightDataConfig.from_env() BRIGHTDATA_API_URL = config.API_URL @@ -404,14 +410,21 @@ class BrightDataDatasetTool(BaseTool): name: str = "Bright Data Dataset Tool" description: str = "Scrapes structured data using Bright Data Dataset API from a URL and optional input parameters" - args_schema: Type[BaseModel] = BrightDataDatasetToolSchema - dataset_type: Optional[str] = None - url: Optional[str] = None + args_schema: type[BaseModel] = BrightDataDatasetToolSchema + dataset_type: str | None = None + url: str | None = None format: str = "json" - zipcode: Optional[str] = None - additional_params: Optional[Dict[str, Any]] = None + zipcode: str | None = None + additional_params: dict[str, Any] | None = None - def __init__(self, dataset_type: str = None, url: str = None, format: str = "json", zipcode: str = None, additional_params: Dict[str, Any] = None): + def __init__( + self, + dataset_type: str | None = None, + url: str | None = None, + format: str = "json", + zipcode: str | None = None, + additional_params: dict[str, Any] | None = None, + ): super().__init__() self.dataset_type = dataset_type self.url = url @@ -427,10 +440,10 @@ class BrightDataDatasetTool(BaseTool): dataset_type: str, output_format: str, url: str, - zipcode: Optional[str] = None, - additional_params: Optional[Dict[str, Any]] = None, + zipcode: str | None = None, + additional_params: dict[str, Any] | None = None, polling_interval: int = 1, - ) -> Dict: + ) -> dict: """ Asynchronously trigger and poll Bright Data dataset scraping. @@ -509,7 +522,7 @@ class BrightDataDatasetTool(BaseTool): if status_data.get("status") == "ready": print("Job is ready") break - elif status_data.get("status") == "error": + if status_data.get("status") == "error": raise BrightDataDatasetToolException( f"Job failed: {status_data}", 0 ) @@ -530,7 +543,15 @@ class BrightDataDatasetTool(BaseTool): return await snapshot_response.text() - def _run(self, url: str = None, dataset_type: str = None, format: str = None, zipcode: str = None, additional_params: Dict[str, Any] = None, **kwargs: Any) -> Any: + def _run( + self, + url: str | None = None, + dataset_type: str | None = None, + format: str | None = None, + zipcode: str | None = None, + additional_params: dict[str, Any] | None = None, + **kwargs: Any, + ) -> Any: dataset_type = dataset_type or self.dataset_type output_format = format or self.format url = url or self.url @@ -538,7 +559,9 @@ class BrightDataDatasetTool(BaseTool): additional_params = additional_params or self.additional_params if not dataset_type: - raise ValueError("dataset_type is required either in constructor or method call") + raise ValueError( + "dataset_type is required either in constructor or method call" + ) if not url: raise ValueError("url is required either in constructor or method call") @@ -563,8 +586,10 @@ class BrightDataDatasetTool(BaseTool): ) ) except TimeoutError as e: - return f"Timeout Exception occured in method : get_dataset_data_async. Details - {str(e)}" + return f"Timeout Exception occured in method : get_dataset_data_async. Details - {e!s}" except BrightDataDatasetToolException as e: - return f"Exception occured in method : get_dataset_data_async. Details - {str(e)}" + return ( + f"Exception occured in method : get_dataset_data_async. Details - {e!s}" + ) except Exception as e: - return f"Bright Data API error: {str(e)}" + return f"Bright Data API error: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_serp.py b/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_serp.py index ae197ce0f..d2ee13b5c 100644 --- a/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_serp.py +++ b/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_serp.py @@ -1,20 +1,24 @@ import os import urllib.parse -from typing import Any, Optional, Type +from typing import Any import requests from crewai.tools import BaseTool from pydantic import BaseModel, Field + class BrightDataConfig(BaseModel): API_URL: str = "https://api.brightdata.com/request" @classmethod def from_env(cls): return cls( - API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com/request") + API_URL=os.environ.get( + "BRIGHTDATA_API_URL", "https://api.brightdata.com/request" + ) ) + class BrightDataSearchToolSchema(BaseModel): """ Schema that defines the input arguments for the BrightDataSearchToolSchema. @@ -30,27 +34,27 @@ class BrightDataSearchToolSchema(BaseModel): """ query: str = Field(..., description="Search query to perform") - search_engine: Optional[str] = Field( + search_engine: str | None = Field( default="google", description="Search engine domain (e.g., 'google', 'bing', 'yandex')", ) - country: Optional[str] = Field( + country: str | None = Field( default="us", description="Two-letter country code for geo-targeting (e.g., 'us', 'gb')", ) - language: Optional[str] = Field( + language: str | None = Field( default="en", description="Language code (e.g., 'en', 'es') used in the query URL", ) - search_type: Optional[str] = Field( + search_type: str | None = Field( default=None, description="Type of search (e.g., 'isch' for images, 'nws' for news)", ) - device_type: Optional[str] = Field( + device_type: str | None = Field( default="desktop", description="Device type to simulate (e.g., 'mobile', 'desktop', 'ios')", ) - parse_results: Optional[bool] = Field( + parse_results: bool | None = Field( default=True, description="Whether to parse and return JSON (True) or raw HTML/text (False)", ) @@ -75,20 +79,29 @@ class BrightDataSearchTool(BaseTool): name: str = "Bright Data SERP Search" description: str = "Tool to perform web search using Bright Data SERP API." - args_schema: Type[BaseModel] = BrightDataSearchToolSchema + args_schema: type[BaseModel] = BrightDataSearchToolSchema _config = BrightDataConfig.from_env() base_url: str = "" api_key: str = "" zone: str = "" - query: Optional[str] = None + query: str | None = None search_engine: str = "google" country: str = "us" language: str = "en" - search_type: Optional[str] = None + search_type: str | None = None device_type: str = "desktop" parse_results: bool = True - def __init__(self, query: str = None, search_engine: str = "google", country: str = "us", language: str = "en", search_type: str = None, device_type: str = "desktop", parse_results: bool = True): + def __init__( + self, + query: str | None = None, + search_engine: str = "google", + country: str = "us", + language: str = "en", + search_type: str | None = None, + device_type: str = "desktop", + parse_results: bool = True, + ): super().__init__() self.base_url = self._config.API_URL self.query = query @@ -109,11 +122,21 @@ class BrightDataSearchTool(BaseTool): def get_search_url(self, engine: str, query: str): if engine == "yandex": return f"https://yandex.com/search/?text=${query}" - elif engine == "bing": + if engine == "bing": return f"https://www.bing.com/search?q=${query}" return f"https://www.google.com/search?q=${query}" - def _run(self, query: str = None, search_engine: str = None, country: str = None, language: str = None, search_type: str = None, device_type: str = None, parse_results: bool = None, **kwargs) -> Any: + def _run( + self, + query: str | None = None, + search_engine: str | None = None, + country: str | None = None, + language: str | None = None, + search_type: str | None = None, + device_type: str | None = None, + parse_results: bool | None = None, + **kwargs, + ) -> Any: """ Executes a search query using Bright Data SERP API and returns results. @@ -137,7 +160,9 @@ class BrightDataSearchTool(BaseTool): language = language or self.language search_type = search_type or self.search_type device_type = device_type or self.device_type - parse_results = parse_results if parse_results is not None else self.parse_results + parse_results = ( + parse_results if parse_results is not None else self.parse_results + ) results_count = kwargs.get("results_count", "10") # Validate required parameters @@ -161,7 +186,7 @@ class BrightDataSearchTool(BaseTool): params.append(f"num={results_count}") if parse_results: - params.append(f"brd_json=1") + params.append("brd_json=1") if search_type: if search_type == "jobs": @@ -202,6 +227,6 @@ class BrightDataSearchTool(BaseTool): return response.text except requests.RequestException as e: - return f"Error performing BrightData search: {str(e)}" + return f"Error performing BrightData search: {e!s}" except Exception as e: - return f"Error fetching results: {str(e)}" + return f"Error fetching results: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py b/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py index 27864cb97..068c4dbb7 100644 --- a/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py +++ b/packages/tools/src/crewai_tools/tools/brightdata_tool/brightdata_unlocker.py @@ -1,19 +1,23 @@ import os -from typing import Any, Optional, Type +from typing import Any import requests from crewai.tools import BaseTool from pydantic import BaseModel, Field + class BrightDataConfig(BaseModel): API_URL: str = "https://api.brightdata.com/request" @classmethod def from_env(cls): return cls( - API_URL=os.environ.get("BRIGHTDATA_API_URL", "https://api.brightdata.com/request") + API_URL=os.environ.get( + "BRIGHTDATA_API_URL", "https://api.brightdata.com/request" + ) ) + class BrightDataUnlockerToolSchema(BaseModel): """ Pydantic schema for input parameters used by the BrightDataWebUnlockerTool. @@ -28,10 +32,10 @@ class BrightDataUnlockerToolSchema(BaseModel): """ url: str = Field(..., description="URL to perform the web scraping") - format: Optional[str] = Field( + format: str | None = Field( default="raw", description="Response format (raw is standard)" ) - data_format: Optional[str] = Field( + data_format: str | None = Field( default="markdown", description="Response data format (html by default)" ) @@ -59,16 +63,18 @@ class BrightDataWebUnlockerTool(BaseTool): name: str = "Bright Data Web Unlocker Scraping" description: str = "Tool to perform web scraping using Bright Data Web Unlocker" - args_schema: Type[BaseModel] = BrightDataUnlockerToolSchema + args_schema: type[BaseModel] = BrightDataUnlockerToolSchema _config = BrightDataConfig.from_env() base_url: str = "" api_key: str = "" zone: str = "" - url: Optional[str] = None + url: str | None = None format: str = "raw" data_format: str = "markdown" - def __init__(self, url: str = None, format: str = "raw", data_format: str = "markdown"): + def __init__( + self, url: str | None = None, format: str = "raw", data_format: str = "markdown" + ): super().__init__() self.base_url = self._config.API_URL self.url = url @@ -82,7 +88,13 @@ class BrightDataWebUnlockerTool(BaseTool): if not self.zone: raise ValueError("BRIGHT_DATA_ZONE environment variable is required.") - def _run(self, url: str = None, format: str = None, data_format: str = None, **kwargs: Any) -> Any: + def _run( + self, + url: str | None = None, + format: str | None = None, + data_format: str | None = None, + **kwargs: Any, + ) -> Any: url = url or self.url format = format or self.format data_format = data_format or self.data_format @@ -119,4 +131,4 @@ class BrightDataWebUnlockerTool(BaseTool): except requests.RequestException as e: return f"HTTP Error performing BrightData Web Unlocker Scrape: {e}\nResponse: {getattr(e.response, 'text', '')}" except Exception as e: - return f"Error fetching results: {str(e)}" + return f"Error fetching results: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py b/packages/tools/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py index b6b3612dc..f26b10dcd 100644 --- a/packages/tools/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py +++ b/packages/tools/src/crewai_tools/tools/browserbase_load_tool/browserbase_load_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, Optional, Type, List +from typing import Any from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field @@ -12,26 +12,34 @@ class BrowserbaseLoadToolSchema(BaseModel): class BrowserbaseLoadTool(BaseTool): name: str = "Browserbase web load tool" description: str = "Load webpages url in a headless browser using Browserbase and return the contents" - args_schema: Type[BaseModel] = BrowserbaseLoadToolSchema - api_key: Optional[str] = os.getenv("BROWSERBASE_API_KEY") - project_id: Optional[str] = os.getenv("BROWSERBASE_PROJECT_ID") - text_content: Optional[bool] = False - session_id: Optional[str] = None - proxy: Optional[bool] = None - browserbase: Optional[Any] = None - package_dependencies: List[str] = ["browserbase"] - env_vars: List[EnvVar] = [ - EnvVar(name="BROWSERBASE_API_KEY", description="API key for Browserbase services", required=False), - EnvVar(name="BROWSERBASE_PROJECT_ID", description="Project ID for Browserbase services", required=False), + args_schema: type[BaseModel] = BrowserbaseLoadToolSchema + api_key: str | None = os.getenv("BROWSERBASE_API_KEY") + project_id: str | None = os.getenv("BROWSERBASE_PROJECT_ID") + text_content: bool | None = False + session_id: str | None = None + proxy: bool | None = None + browserbase: Any | None = None + package_dependencies: list[str] = ["browserbase"] + env_vars: list[EnvVar] = [ + EnvVar( + name="BROWSERBASE_API_KEY", + description="API key for Browserbase services", + required=False, + ), + EnvVar( + name="BROWSERBASE_PROJECT_ID", + description="Project ID for Browserbase services", + required=False, + ), ] def __init__( self, - api_key: Optional[str] = None, - project_id: Optional[str] = None, - text_content: Optional[bool] = False, - session_id: Optional[str] = None, - proxy: Optional[bool] = None, + api_key: str | None = None, + project_id: str | None = None, + text_content: bool | None = False, + session_id: str | None = None, + proxy: bool | None = None, **kwargs, ): super().__init__(**kwargs) diff --git a/packages/tools/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py b/packages/tools/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py index 155b4390d..35dddcebd 100644 --- a/packages/tools/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/code_docs_search_tool/code_docs_search_tool.py @@ -1,11 +1,4 @@ -from typing import Any, Optional, Type - -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -31,9 +24,9 @@ class CodeDocsSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a Code Docs content." ) - args_schema: Type[BaseModel] = CodeDocsSearchToolSchema + args_schema: type[BaseModel] = CodeDocsSearchToolSchema - def __init__(self, docs_url: Optional[str] = None, **kwargs): + def __init__(self, docs_url: str | None = None, **kwargs): super().__init__(**kwargs) if docs_url is not None: self.add(docs_url) @@ -42,15 +35,17 @@ class CodeDocsSearchTool(RagTool): self._generate_description() def add(self, docs_url: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().add(docs_url, data_type=DataType.DOCS_SITE) def _run( self, search_query: str, - docs_url: Optional[str] = None, + docs_url: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if docs_url is not None: self.add(docs_url) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py b/packages/tools/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py index 95559f2a7..dc348d3b3 100644 --- a/packages/tools/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py +++ b/packages/tools/src/crewai_tools/tools/code_interpreter_tool/code_interpreter_tool.py @@ -8,17 +8,16 @@ potentially unsafe operations and importing restricted modules. import importlib.util import os from types import ModuleType -from typing import Any, Dict, List, Optional, Type +from typing import Any from crewai.tools import BaseTool +from crewai_tools.printer import Printer from docker import DockerClient from docker import from_env as docker_from_env from docker.errors import ImageNotFound, NotFound from docker.models.containers import Container from pydantic import BaseModel, Field -from crewai_tools.printer import Printer - class CodeInterpreterSchema(BaseModel): """Schema for defining inputs to the CodeInterpreterTool. @@ -32,7 +31,7 @@ class CodeInterpreterSchema(BaseModel): description="Python3 code used to be interpreted in the Docker container. ALWAYS PRINT the final result and the output of the code", ) - libraries_used: List[str] = Field( + libraries_used: list[str] = Field( ..., description="List of libraries used in the code with proper installing names separated by commas. Example: numpy,pandas,beautifulsoup4", ) @@ -74,9 +73,9 @@ class SandboxPython: @staticmethod def restricted_import( name: str, - custom_globals: Optional[Dict[str, Any]] = None, - custom_locals: Optional[Dict[str, Any]] = None, - fromlist: Optional[List[str]] = None, + custom_globals: dict[str, Any] | None = None, + custom_locals: dict[str, Any] | None = None, + fromlist: list[str] | None = None, level: int = 0, ) -> ModuleType: """A restricted import function that blocks importing of unsafe modules. @@ -99,7 +98,7 @@ class SandboxPython: return __import__(name, custom_globals, custom_locals, fromlist or (), level) @staticmethod - def safe_builtins() -> Dict[str, Any]: + def safe_builtins() -> dict[str, Any]: """Creates a dictionary of built-in functions with unsafe ones removed. Returns: @@ -116,7 +115,7 @@ class SandboxPython: return safe_builtins @staticmethod - def exec(code: str, locals: Dict[str, Any]) -> None: + def exec(code: str, locals: dict[str, Any]) -> None: """Executes Python code in a restricted environment. Args: @@ -136,11 +135,11 @@ class CodeInterpreterTool(BaseTool): name: str = "Code Interpreter" description: str = "Interprets Python3 code strings with a final print statement." - args_schema: Type[BaseModel] = CodeInterpreterSchema + args_schema: type[BaseModel] = CodeInterpreterSchema default_image_tag: str = "code-interpreter:latest" - code: Optional[str] = None - user_dockerfile_path: Optional[str] = None - user_docker_base_url: Optional[str] = None + code: str | None = None + user_dockerfile_path: str | None = None + user_docker_base_url: str | None = None unsafe_mode: bool = False @staticmethod @@ -205,10 +204,9 @@ class CodeInterpreterTool(BaseTool): if self.unsafe_mode: return self.run_code_unsafe(code, libraries_used) - else: - return self.run_code_safety(code, libraries_used) + return self.run_code_safety(code, libraries_used) - def _install_libraries(self, container: Container, libraries: List[str]) -> None: + def _install_libraries(self, container: Container, libraries: list[str]) -> None: """Installs required Python libraries in the Docker container. Args: @@ -278,7 +276,7 @@ class CodeInterpreterTool(BaseTool): Printer.print("Docker is not installed", color="bold_purple") return False - def run_code_safety(self, code: str, libraries_used: List[str]) -> str: + def run_code_safety(self, code: str, libraries_used: list[str]) -> str: """Runs code in the safest available environment. Attempts to run code in Docker if available, falls back to a restricted @@ -293,10 +291,9 @@ class CodeInterpreterTool(BaseTool): """ if self._check_docker_available(): return self.run_code_in_docker(code, libraries_used) - else: - return self.run_code_in_restricted_sandbox(code) + return self.run_code_in_restricted_sandbox(code) - def run_code_in_docker(self, code: str, libraries_used: List[str]) -> str: + def run_code_in_docker(self, code: str, libraries_used: list[str]) -> str: """Runs Python code in a Docker container for safe isolation. Creates a Docker container, installs the required libraries, executes the code, @@ -342,9 +339,9 @@ class CodeInterpreterTool(BaseTool): SandboxPython.exec(code=code, locals=exec_locals) return exec_locals.get("result", "No result variable found.") except Exception as e: - return f"An error occurred: {str(e)}" + return f"An error occurred: {e!s}" - def run_code_unsafe(self, code: str, libraries_used: List[str]) -> str: + def run_code_unsafe(self, code: str, libraries_used: list[str]) -> str: """Runs code directly on the host machine without any safety restrictions. WARNING: This mode is unsafe and should only be used in trusted environments @@ -370,4 +367,4 @@ class CodeInterpreterTool(BaseTool): exec(code, {}, exec_locals) return exec_locals.get("result", "No result variable found.") except Exception as e: - return f"An error occurred: {str(e)}" + return f"An error occurred: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/composio_tool/composio_tool.py b/packages/tools/src/crewai_tools/tools/composio_tool/composio_tool.py index 019b7895c..4e587ba91 100644 --- a/packages/tools/src/crewai_tools/tools/composio_tool/composio_tool.py +++ b/packages/tools/src/crewai_tools/tools/composio_tool/composio_tool.py @@ -12,8 +12,12 @@ class ComposioTool(BaseTool): """Wrapper for composio tools.""" composio_action: t.Callable - env_vars: t.List[EnvVar] = [ - EnvVar(name="COMPOSIO_API_KEY", description="API key for Composio services", required=True), + env_vars: list[EnvVar] = [ + EnvVar( + name="COMPOSIO_API_KEY", + description="API key for Composio services", + required=True, + ), ] def _run(self, *args: t.Any, **kwargs: t.Any) -> t.Any: @@ -31,7 +35,7 @@ class ComposioTool(BaseTool): return connections = t.cast( - t.List[ConnectedAccountModel], + list[ConnectedAccountModel], toolset.client.connected_accounts.get(), ) if tool.app not in [connection.appUniqueId for connection in connections]: @@ -66,7 +70,7 @@ class ComposioTool(BaseTool): schema = action_schema.model_dump(exclude_none=True) entity_id = kwargs.pop("entity_id", DEFAULT_ENTITY_ID) - def function(**kwargs: t.Any) -> t.Dict: + def function(**kwargs: t.Any) -> dict: """Wrapper function for composio action.""" return toolset.execute_action( action=Action(schema["name"]), @@ -93,10 +97,10 @@ class ComposioTool(BaseTool): def from_app( cls, *apps: t.Any, - tags: t.Optional[t.List[str]] = None, - use_case: t.Optional[str] = None, + tags: list[str] | None = None, + use_case: str | None = None, **kwargs: t.Any, - ) -> t.List[te.Self]: + ) -> list[te.Self]: """Create toolset from an app.""" if len(apps) == 0: raise ValueError("You need to provide at least one app name") diff --git a/packages/tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py b/packages/tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py index 7c531273e..28eb987f6 100644 --- a/packages/tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py +++ b/packages/tools/src/crewai_tools/tools/contextualai_create_agent_tool/contextual_create_agent_tool.py @@ -1,32 +1,36 @@ -from typing import Any, Optional, Type, List +from typing import Any + from crewai.tools import BaseTool from pydantic import BaseModel, Field -import os class ContextualAICreateAgentSchema(BaseModel): """Schema for contextual create agent tool.""" + agent_name: str = Field(..., description="Name for the new agent") agent_description: str = Field(..., description="Description for the new agent") datastore_name: str = Field(..., description="Name for the new datastore") - document_paths: List[str] = Field(..., description="List of file paths to upload") + document_paths: list[str] = Field(..., description="List of file paths to upload") class ContextualAICreateAgentTool(BaseTool): """Tool to create Contextual AI RAG agents with documents.""" - + name: str = "Contextual AI Create Agent Tool" - description: str = "Create a new Contextual AI RAG agent with documents and datastore" - args_schema: Type[BaseModel] = ContextualAICreateAgentSchema - + description: str = ( + "Create a new Contextual AI RAG agent with documents and datastore" + ) + args_schema: type[BaseModel] = ContextualAICreateAgentSchema + api_key: str contextual_client: Any = None - package_dependencies: List[str] = ["contextual-client"] + package_dependencies: list[str] = ["contextual-client"] def __init__(self, **kwargs): super().__init__(**kwargs) try: from contextual import ContextualAI + self.contextual_client = ContextualAI(api_key=self.api_key) except ImportError: raise ImportError( @@ -38,34 +42,38 @@ class ContextualAICreateAgentTool(BaseTool): agent_name: str, agent_description: str, datastore_name: str, - document_paths: List[str] + document_paths: list[str], ) -> str: """Create a complete RAG pipeline with documents.""" try: import os - + # Create datastore datastore = self.contextual_client.datastores.create(name=datastore_name) datastore_id = datastore.id - + # Upload documents document_ids = [] for doc_path in document_paths: if not os.path.exists(doc_path): raise FileNotFoundError(f"Document not found: {doc_path}") - - with open(doc_path, 'rb') as f: - ingestion_result = self.contextual_client.datastores.documents.ingest(datastore_id, file=f) + + with open(doc_path, "rb") as f: + ingestion_result = ( + self.contextual_client.datastores.documents.ingest( + datastore_id, file=f + ) + ) document_ids.append(ingestion_result.id) - + # Create agent agent = self.contextual_client.agents.create( name=agent_name, description=agent_description, - datastore_ids=[datastore_id] + datastore_ids=[datastore_id], ) - + return f"Successfully created agent '{agent_name}' with ID: {agent.id} and datastore ID: {datastore_id}. Uploaded {len(document_ids)} documents." - + except Exception as e: - return f"Failed to create agent with documents: {str(e)}" + return f"Failed to create agent with documents: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py b/packages/tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py index 5985b60f1..68eb17e56 100644 --- a/packages/tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py +++ b/packages/tools/src/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py @@ -1,51 +1,62 @@ -from typing import Any, Optional, Type, List from crewai.tools import BaseTool from pydantic import BaseModel, Field class ContextualAIParseSchema(BaseModel): """Schema for contextual parse tool.""" + file_path: str = Field(..., description="Path to the document to parse") parse_mode: str = Field(default="standard", description="Parsing mode") - figure_caption_mode: str = Field(default="concise", description="Figure caption mode") - enable_document_hierarchy: bool = Field(default=True, description="Enable document hierarchy") - page_range: Optional[str] = Field(default=None, description="Page range to parse (e.g., '0-5')") - output_types: List[str] = Field(default=["markdown-per-page"], description="List of output types") + figure_caption_mode: str = Field( + default="concise", description="Figure caption mode" + ) + enable_document_hierarchy: bool = Field( + default=True, description="Enable document hierarchy" + ) + page_range: str | None = Field( + default=None, description="Page range to parse (e.g., '0-5')" + ) + output_types: list[str] = Field( + default=["markdown-per-page"], description="List of output types" + ) class ContextualAIParseTool(BaseTool): """Tool to parse documents using Contextual AI's parser.""" - + name: str = "Contextual AI Document Parser" description: str = "Parse documents using Contextual AI's advanced document parser" - args_schema: Type[BaseModel] = ContextualAIParseSchema - + args_schema: type[BaseModel] = ContextualAIParseSchema + api_key: str - package_dependencies: List[str] = ["contextual-client"] + package_dependencies: list[str] = ["contextual-client"] def _run( - self, - file_path: str, + self, + file_path: str, parse_mode: str = "standard", figure_caption_mode: str = "concise", enable_document_hierarchy: bool = True, - page_range: Optional[str] = None, - output_types: List[str] = ["markdown-per-page"] + page_range: str | None = None, + output_types: list[str] | None = None, ) -> str: """Parse a document using Contextual AI's parser.""" + if output_types is None: + output_types = ["markdown-per-page"] try: - import requests import json import os from time import sleep + import requests + if not os.path.exists(file_path): raise FileNotFoundError(f"Document not found: {file_path}") base_url = "https://api.contextual.ai/v1" headers = { "accept": "application/json", - "authorization": f"Bearer {self.api_key}" + "authorization": f"Bearer {self.api_key}", } # Submit parse job @@ -63,17 +74,17 @@ class ContextualAIParseTool(BaseTool): file = {"raw_file": fp} result = requests.post(url, headers=headers, data=config, files=file) response = json.loads(result.text) - job_id = response['job_id'] + job_id = response["job_id"] # Monitor job status status_url = f"{base_url}/parse/jobs/{job_id}/status" while True: result = requests.get(status_url, headers=headers) - parse_response = json.loads(result.text)['status'] + parse_response = json.loads(result.text)["status"] if parse_response == "completed": break - elif parse_response == "failed": + if parse_response == "failed": raise RuntimeError("Document parsing failed") sleep(5) @@ -89,4 +100,4 @@ class ContextualAIParseTool(BaseTool): return json.dumps(json.loads(result.text), indent=2) except Exception as e: - return f"Failed to parse document: {str(e)}" + return f"Failed to parse document: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/contextualai_query_tool/contextual_query_tool.py b/packages/tools/src/crewai_tools/tools/contextualai_query_tool/contextual_query_tool.py index 955ba6a39..13fd04299 100644 --- a/packages/tools/src/crewai_tools/tools/contextualai_query_tool/contextual_query_tool.py +++ b/packages/tools/src/crewai_tools/tools/contextualai_query_tool/contextual_query_tool.py @@ -1,33 +1,39 @@ -from typing import Any, Optional, Type, List +import asyncio +from typing import Any + +import requests from crewai.tools import BaseTool from pydantic import BaseModel, Field -import asyncio -import requests -import os class ContextualAIQuerySchema(BaseModel): """Schema for contextual query tool.""" + query: str = Field(..., description="Query to send to the Contextual AI agent.") agent_id: str = Field(..., description="ID of the Contextual AI agent to query") - datastore_id: Optional[str] = Field(None, description="Optional datastore ID for document readiness verification") + datastore_id: str | None = Field( + None, description="Optional datastore ID for document readiness verification" + ) class ContextualAIQueryTool(BaseTool): """Tool to query Contextual AI RAG agents.""" - + name: str = "Contextual AI Query Tool" - description: str = "Use this tool to query a Contextual AI RAG agent with access to your documents" - args_schema: Type[BaseModel] = ContextualAIQuerySchema - + description: str = ( + "Use this tool to query a Contextual AI RAG agent with access to your documents" + ) + args_schema: type[BaseModel] = ContextualAIQuerySchema + api_key: str contextual_client: Any = None - package_dependencies: List[str] = ["contextual-client"] + package_dependencies: list[str] = ["contextual-client"] def __init__(self, **kwargs): super().__init__(**kwargs) try: from contextual import ContextualAI + self.contextual_client = ContextualAI(api_key=self.api_key) except ImportError: raise ImportError( @@ -41,13 +47,17 @@ class ContextualAIQueryTool(BaseTool): response = requests.get(url, headers=headers) if response.status_code == 200: data = response.json() - documents = data.get('documents', []) - return not any(doc.get('status') in ('processing', 'pending') for doc in documents) + documents = data.get("documents", []) + return not any( + doc.get("status") in ("processing", "pending") for doc in documents + ) return True - async def _wait_for_documents_async(self, datastore_id: str, max_attempts: int = 20, interval: float = 30.0) -> bool: + async def _wait_for_documents_async( + self, datastore_id: str, max_attempts: int = 20, interval: float = 30.0 + ) -> bool: """Asynchronously poll until documents are ready, exiting early if possible.""" - for attempt in range(max_attempts): + for _attempt in range(max_attempts): ready = await asyncio.to_thread(self._check_documents_ready, datastore_id) if ready: return True @@ -55,10 +65,10 @@ class ContextualAIQueryTool(BaseTool): print("Processing documents ...") return True # give up but don't fail hard - def _run(self, query: str, agent_id: str, datastore_id: Optional[str] = None) -> str: + def _run(self, query: str, agent_id: str, datastore_id: str | None = None) -> str: if not agent_id: raise ValueError("Agent ID is required to query the Contextual AI agent") - + if datastore_id: ready = self._check_documents_ready(datastore_id) if not ready: @@ -69,31 +79,42 @@ class ContextualAIQueryTool(BaseTool): loop = None if loop and loop.is_running(): - # Already inside an event loop + # Already inside an event loop try: import nest_asyncio + nest_asyncio.apply(loop) - loop.run_until_complete(self._wait_for_documents_async(datastore_id)) + loop.run_until_complete( + self._wait_for_documents_async(datastore_id) + ) except Exception as e: - print(f"Failed to apply nest_asyncio: {str(e)}") + print(f"Failed to apply nest_asyncio: {e!s}") else: asyncio.run(self._wait_for_documents_async(datastore_id)) else: - print("Warning: No datastore_id provided. Document status checking disabled.") + print( + "Warning: No datastore_id provided. Document status checking disabled." + ) try: response = self.contextual_client.agents.query.create( - agent_id=agent_id, - messages=[{"role": "user", "content": query}] + agent_id=agent_id, messages=[{"role": "user", "content": query}] ) - if hasattr(response, 'content'): + if hasattr(response, "content"): return response.content - elif hasattr(response, 'message'): - return response.message.content if hasattr(response.message, 'content') else str(response.message) - elif hasattr(response, 'messages') and len(response.messages) > 0: + if hasattr(response, "message"): + return ( + response.message.content + if hasattr(response.message, "content") + else str(response.message) + ) + if hasattr(response, "messages") and len(response.messages) > 0: last_message = response.messages[-1] - return last_message.content if hasattr(last_message, 'content') else str(last_message) - else: - return str(response) + return ( + last_message.content + if hasattr(last_message, "content") + else str(last_message) + ) + return str(response) except Exception as e: - return f"Error querying Contextual AI agent: {str(e)}" + return f"Error querying Contextual AI agent: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/contextualai_rerank_tool/contextual_rerank_tool.py b/packages/tools/src/crewai_tools/tools/contextualai_rerank_tool/contextual_rerank_tool.py index c0bcab8a2..2dfa540b6 100644 --- a/packages/tools/src/crewai_tools/tools/contextualai_rerank_tool/contextual_rerank_tool.py +++ b/packages/tools/src/crewai_tools/tools/contextualai_rerank_tool/contextual_rerank_tool.py @@ -1,68 +1,79 @@ -from typing import Any, Optional, Type, List +from typing import ClassVar + from crewai.tools import BaseTool from pydantic import BaseModel, Field class ContextualAIRerankSchema(BaseModel): """Schema for contextual rerank tool.""" + query: str = Field(..., description="The search query to rerank documents against") - documents: List[str] = Field(..., description="List of document texts to rerank") - instruction: Optional[str] = Field(default=None, description="Optional instruction for reranking behavior") - metadata: Optional[List[str]] = Field(default=None, description="Optional metadata for each document") - model: str = Field(default="ctxl-rerank-en-v1-instruct", description="Reranker model to use") + documents: list[str] = Field(..., description="List of document texts to rerank") + instruction: str | None = Field( + default=None, description="Optional instruction for reranking behavior" + ) + metadata: list[str] | None = Field( + default=None, description="Optional metadata for each document" + ) + model: str = Field( + default="ctxl-rerank-en-v1-instruct", description="Reranker model to use" + ) class ContextualAIRerankTool(BaseTool): """Tool to rerank documents using Contextual AI's instruction-following reranker.""" - + name: str = "Contextual AI Document Reranker" - description: str = "Rerank documents using Contextual AI's instruction-following reranker" - args_schema: Type[BaseModel] = ContextualAIRerankSchema - + description: str = ( + "Rerank documents using Contextual AI's instruction-following reranker" + ) + args_schema: type[BaseModel] = ContextualAIRerankSchema + api_key: str - package_dependencies: List[str] = ["contextual-client"] + package_dependencies: ClassVar[list[str]] = ["contextual-client"] def _run( self, query: str, - documents: List[str], - instruction: Optional[str] = None, - metadata: Optional[List[str]] = None, - model: str = "ctxl-rerank-en-v1-instruct" + documents: list[str], + instruction: str | None = None, + metadata: list[str] | None = None, + model: str = "ctxl-rerank-en-v1-instruct", ) -> str: """Rerank documents using Contextual AI's instruction-following reranker.""" try: - import requests import json + import requests + base_url = "https://api.contextual.ai/v1" headers = { "accept": "application/json", "content-type": "application/json", - "authorization": f"Bearer {self.api_key}" + "authorization": f"Bearer {self.api_key}", } - payload = { - "query": query, - "documents": documents, - "model": model - } + payload = {"query": query, "documents": documents, "model": model} if instruction: payload["instruction"] = instruction if metadata: if len(metadata) != len(documents): - raise ValueError("Metadata list must have the same length as documents list") + raise ValueError( + "Metadata list must have the same length as documents list" + ) payload["metadata"] = metadata rerank_url = f"{base_url}/rerank" - result = requests.post(rerank_url, json=payload, headers=headers) + result = requests.post(rerank_url, json=payload, headers=headers, timeout=30) if result.status_code != 200: - raise RuntimeError(f"Reranker API returned status {result.status_code}: {result.text}") + raise RuntimeError( + f"Reranker API returned status {result.status_code}: {result.text}" + ) return json.dumps(result.json(), indent=2) except Exception as e: - return f"Failed to rerank documents: {str(e)}" + return f"Failed to rerank documents: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/couchbase_tool/couchbase_tool.py b/packages/tools/src/crewai_tools/tools/couchbase_tool/couchbase_tool.py index 3017f694f..6554ef483 100644 --- a/packages/tools/src/crewai_tools/tools/couchbase_tool/couchbase_tool.py +++ b/packages/tools/src/crewai_tools/tools/couchbase_tool/couchbase_tool.py @@ -1,6 +1,6 @@ import json -import os -from typing import Any, Optional, Type, List, Dict, Callable +from collections.abc import Callable +from typing import Any try: import couchbase.search as search @@ -29,30 +29,33 @@ class CouchbaseToolSchema(BaseModel): description="The query to search retrieve relevant information from the Couchbase database. Pass only the query, not the question.", ) + class CouchbaseFTSVectorSearchTool(BaseTool): """Tool to search the Couchbase database""" model_config = {"arbitrary_types_allowed": True} name: str = "CouchbaseFTSVectorSearchTool" description: str = "A tool to search the Couchbase database for relevant information on internal documents." - args_schema: Type[BaseModel] = CouchbaseToolSchema - cluster: SkipValidation[Optional[Cluster]] = None - collection_name: Optional[str] = None, - scope_name: Optional[str] = None, - bucket_name: Optional[str] = None, - index_name: Optional[str] = None, - embedding_key: Optional[str] = Field( + args_schema: type[BaseModel] = CouchbaseToolSchema + cluster: SkipValidation[Cluster | None] = None + collection_name: str | None = (None,) + scope_name: str | None = (None,) + bucket_name: str | None = (None,) + index_name: str | None = (None,) + embedding_key: str | None = Field( default="embedding", - description="Name of the field in the search index that stores the vector" + description="Name of the field in the search index that stores the vector", ) - scoped_index: Optional[bool] = Field( - default=True, - description="Specify whether the index is scoped. Is True by default." - ), - limit: Optional[int] = Field(default=3) - embedding_function: SkipValidation[Callable[[str], List[float]]] = Field( + scoped_index: bool | None = ( + Field( + default=True, + description="Specify whether the index is scoped. Is True by default.", + ), + ) + limit: int | None = Field(default=3) + embedding_function: SkipValidation[Callable[[str], list[float]]] = Field( default=None, - description="A function that takes a string and returns a list of floats. This is used to embed the query before searching the database." + description="A function that takes a string and returns a list of floats. This is used to embed the query before searching the database.", ) def _check_bucket_exists(self) -> bool: @@ -67,7 +70,7 @@ class CouchbaseFTSVectorSearchTool(BaseTool): def _check_scope_and_collection_exists(self) -> bool: """Check if the scope and collection exists in the linked Couchbase bucket Raises a ValueError if either is not found""" - scope_collection_map: Dict[str, Any] = {} + scope_collection_map: dict[str, Any] = {} # Get a list of all scopes in the bucket for scope in self._bucket.collections().get_all_scopes(): @@ -203,11 +206,7 @@ class CouchbaseFTSVectorSearchTool(BaseTool): search_req = search.SearchRequest.create( VectorSearch.from_vector_query( - VectorQuery( - self.embedding_key, - query_embedding, - self.limit - ) + VectorQuery(self.embedding_key, query_embedding, self.limit) ) ) @@ -219,16 +218,13 @@ class CouchbaseFTSVectorSearchTool(BaseTool): SearchOptions( limit=self.limit, fields=fields, - ) + ), ) else: search_iter = self.cluster.search( self.index_name, search_req, - SearchOptions( - limit=self.limit, - fields=fields - ) + SearchOptions(limit=self.limit, fields=fields), ) json_response = [] @@ -238,4 +234,4 @@ class CouchbaseFTSVectorSearchTool(BaseTool): except Exception as e: return f"Search failed with error: {e}" - return json.dumps(json_response, indent=2) \ No newline at end of file + return json.dumps(json_response, indent=2) diff --git a/packages/tools/src/crewai_tools/tools/crewai_enterprise_tools/crewai_enterprise_tools.py b/packages/tools/src/crewai_tools/tools/crewai_enterprise_tools/crewai_enterprise_tools.py index f5ac47643..63c55c310 100644 --- a/packages/tools/src/crewai_tools/tools/crewai_enterprise_tools/crewai_enterprise_tools.py +++ b/packages/tools/src/crewai_tools/tools/crewai_enterprise_tools/crewai_enterprise_tools.py @@ -2,10 +2,10 @@ Crewai Enterprise Tools """ -import os -import typing as t -import logging import json +import logging +import os + from crewai.tools import BaseTool from crewai_tools.adapters.enterprise_adapter import EnterpriseActionKitToolAdapter from crewai_tools.adapters.tool_collection import ToolCollection @@ -13,11 +13,11 @@ from crewai_tools.adapters.tool_collection import ToolCollection logger = logging.getLogger(__name__) -def CrewaiEnterpriseTools( - enterprise_token: t.Optional[str] = None, - actions_list: t.Optional[t.List[str]] = None, - enterprise_action_kit_project_id: t.Optional[str] = None, - enterprise_action_kit_project_url: t.Optional[str] = None, +def CrewaiEnterpriseTools( # noqa: N802 + enterprise_token: str | None = None, + actions_list: list[str] | None = None, + enterprise_action_kit_project_id: str | None = None, + enterprise_action_kit_project_url: str | None = None, ) -> ToolCollection[BaseTool]: """Factory function that returns crewai enterprise tools. @@ -34,10 +34,11 @@ def CrewaiEnterpriseTools( """ import warnings + warnings.warn( "CrewaiEnterpriseTools will be removed in v1.0.0. Considering use `Agent(apps=[...])` instead.", DeprecationWarning, - stacklevel=2 + stacklevel=2, ) if enterprise_token is None or enterprise_token == "": @@ -65,7 +66,7 @@ def CrewaiEnterpriseTools( # ENTERPRISE INJECTION ONLY -def _parse_actions_list(actions_list: t.Optional[t.List[str]]) -> t.List[str] | None: +def _parse_actions_list(actions_list: list[str] | None) -> list[str] | None: """Parse a string representation of a list of tool names to a list of tool names. Args: diff --git a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/__init__.py b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/__init__.py index 55db598c5..5938eba3e 100644 --- a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/__init__.py +++ b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/__init__.py @@ -4,13 +4,18 @@ This module provides tools for integrating with various platform applications through the CrewAI platform API. """ -from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import CrewaiPlatformTools -from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import CrewAIPlatformActionTool -from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import CrewaiPlatformToolBuilder - +from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import ( + CrewAIPlatformActionTool, +) +from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import ( + CrewaiPlatformToolBuilder, +) +from crewai_tools.tools.crewai_platform_tools.crewai_platform_tools import ( + CrewaiPlatformTools, +) __all__ = [ - "CrewaiPlatformTools", "CrewAIPlatformActionTool", "CrewaiPlatformToolBuilder", + "CrewaiPlatformTools", ] diff --git a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py index 8df877408..1c1af2d69 100644 --- a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py +++ b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_action_tool.py @@ -1,18 +1,24 @@ """ Crewai Enterprise Tools """ -import re + import json +import re +from typing import Any, Literal, Optional, Union, cast, get_origin + import requests -from typing import Dict, Any, List, Type, Optional, Union, get_origin, cast, Literal -from pydantic import Field, create_model from crewai.tools import BaseTool -from crewai_tools.tools.crewai_platform_tools.misc import get_platform_api_base_url, get_platform_integration_token +from pydantic import Field, create_model + +from crewai_tools.tools.crewai_platform_tools.misc import ( + get_platform_api_base_url, + get_platform_integration_token, +) class CrewAIPlatformActionTool(BaseTool): action_name: str = Field(default="", description="The name of the action") - action_schema: Dict[str, Any] = Field( + action_schema: dict[str, Any] = Field( default_factory=dict, description="The schema of the action" ) @@ -20,7 +26,7 @@ class CrewAIPlatformActionTool(BaseTool): self, description: str, action_name: str, - action_schema: Dict[str, Any], + action_schema: dict[str, Any], ): self._model_registry = {} self._base_name = self._sanitize_name(action_name) @@ -36,7 +42,7 @@ class CrewAIPlatformActionTool(BaseTool): field_type = self._process_schema_type( param_details, self._sanitize_name(param_name).title() ) - except Exception as e: + except Exception: field_type = str field_definitions[param_name] = self._create_field_definition( @@ -60,7 +66,11 @@ class CrewAIPlatformActionTool(BaseTool): input_text=(str, Field(description="Input for the action")), ) - super().__init__(name=action_name.lower().replace(" ", "_"), description=description, args_schema=args_schema) + super().__init__( + name=action_name.lower().replace(" ", "_"), + description=description, + args_schema=args_schema, + ) self.action_name = action_name self.action_schema = action_schema @@ -71,8 +81,8 @@ class CrewAIPlatformActionTool(BaseTool): return "".join(word.capitalize() for word in parts if word) def _extract_schema_info( - self, action_schema: Dict[str, Any] - ) -> tuple[Dict[str, Any], List[str]]: + self, action_schema: dict[str, Any] + ) -> tuple[dict[str, Any], list[str]]: schema_props = ( action_schema.get("function", {}) .get("parameters", {}) @@ -83,7 +93,7 @@ class CrewAIPlatformActionTool(BaseTool): ) return schema_props, required - def _process_schema_type(self, schema: Dict[str, Any], type_name: str) -> Type[Any]: + def _process_schema_type(self, schema: dict[str, Any], type_name: str) -> type[Any]: if "anyOf" in schema: any_of_types = schema["anyOf"] is_nullable = any(t.get("type") == "null" for t in any_of_types) @@ -92,7 +102,7 @@ class CrewAIPlatformActionTool(BaseTool): if non_null_types: base_type = self._process_schema_type(non_null_types[0], type_name) return Optional[base_type] if is_nullable else base_type - return cast(Type[Any], Optional[str]) + return cast(type[Any], Optional[str]) if "oneOf" in schema: return self._process_schema_type(schema["oneOf"][0], type_name) @@ -111,14 +121,16 @@ class CrewAIPlatformActionTool(BaseTool): if json_type == "array": items_schema = schema.get("items", {"type": "string"}) item_type = self._process_schema_type(items_schema, f"{type_name}Item") - return List[item_type] + return list[item_type] if json_type == "object": return self._create_nested_model(schema, type_name) return self._map_json_type_to_python(json_type) - def _create_nested_model(self, schema: Dict[str, Any], model_name: str) -> Type[Any]: + def _create_nested_model( + self, schema: dict[str, Any], model_name: str + ) -> type[Any]: full_model_name = f"{self._base_name}{model_name}" if full_model_name in self._model_registry: @@ -139,7 +151,7 @@ class CrewAIPlatformActionTool(BaseTool): prop_type = self._process_schema_type( prop_schema, f"{model_name}{self._sanitize_name(prop_name).title()}" ) - except Exception as e: + except Exception: prop_type = str field_definitions[prop_name] = self._create_field_definition( @@ -155,20 +167,18 @@ class CrewAIPlatformActionTool(BaseTool): return dict def _create_field_definition( - self, field_type: Type[Any], is_required: bool, description: str + self, field_type: type[Any], is_required: bool, description: str ) -> tuple: if is_required: return (field_type, Field(description=description)) - else: - if get_origin(field_type) is Union: - return (field_type, Field(default=None, description=description)) - else: - return ( - Optional[field_type], - Field(default=None, description=description), - ) + if get_origin(field_type) is Union: + return (field_type, Field(default=None, description=description)) + return ( + Optional[field_type], + Field(default=None, description=description), + ) - def _map_json_type_to_python(self, json_type: str) -> Type[Any]: + def _map_json_type_to_python(self, json_type: str) -> type[Any]: type_mapping = { "string": str, "integer": int, @@ -180,7 +190,7 @@ class CrewAIPlatformActionTool(BaseTool): } return type_mapping.get(json_type, str) - def _get_required_nullable_fields(self) -> List[str]: + def _get_required_nullable_fields(self) -> list[str]: schema_props, required = self._extract_schema_info(self.action_schema) required_nullable_fields = [] @@ -191,7 +201,7 @@ class CrewAIPlatformActionTool(BaseTool): return required_nullable_fields - def _is_nullable_type(self, schema: Dict[str, Any]) -> bool: + def _is_nullable_type(self, schema: dict[str, Any]) -> bool: if "anyOf" in schema: return any(t.get("type") == "null" for t in schema["anyOf"]) return schema.get("type") == "null" @@ -209,8 +219,9 @@ class CrewAIPlatformActionTool(BaseTool): if field_name not in cleaned_kwargs: cleaned_kwargs[field_name] = None - - api_url = f"{get_platform_api_base_url()}/actions/{self.action_name}/execute" + api_url = ( + f"{get_platform_api_base_url()}/actions/{self.action_name}/execute" + ) token = get_platform_integration_token() headers = { "Authorization": f"Bearer {token}", @@ -230,4 +241,4 @@ class CrewAIPlatformActionTool(BaseTool): return json.dumps(data, indent=2) except Exception as e: - return f"Error executing action {self.action_name}: {str(e)}" + return f"Error executing action {self.action_name}: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py index 9a8feb94c..cc3d3ef3f 100644 --- a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py +++ b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py @@ -1,9 +1,15 @@ +from typing import Any import requests -from typing import List, Any, Dict from crewai.tools import BaseTool -from crewai_tools.tools.crewai_platform_tools.misc import get_platform_api_base_url, get_platform_integration_token -from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import CrewAIPlatformActionTool + +from crewai_tools.tools.crewai_platform_tools.crewai_platform_action_tool import ( + CrewAIPlatformActionTool, +) +from crewai_tools.tools.crewai_platform_tools.misc import ( + get_platform_api_base_url, + get_platform_integration_token, +) class CrewaiPlatformToolBuilder: @@ -27,13 +33,15 @@ class CrewaiPlatformToolBuilder: try: response = requests.get( - actions_url, headers=headers, timeout=30, params={"apps": ",".join(self._apps)} + actions_url, + headers=headers, + timeout=30, + params={"apps": ",".join(self._apps)}, ) response.raise_for_status() - except Exception as e: + except Exception: return - raw_data = response.json() self._actions_schema = {} @@ -46,7 +54,9 @@ class CrewaiPlatformToolBuilder: action_schema = { "function": { "name": action_name, - "description": action.get("description", f"Execute {action_name}"), + "description": action.get( + "description", f"Execute {action_name}" + ), "parameters": action.get("parameters", {}), "app": app, } @@ -54,8 +64,8 @@ class CrewaiPlatformToolBuilder: self._actions_schema[action_name] = action_schema def _generate_detailed_description( - self, schema: Dict[str, Any], indent: int = 0 - ) -> List[str]: + self, schema: dict[str, Any], indent: int = 0 + ) -> list[str]: descriptions = [] indent_str = " " * indent @@ -127,7 +137,6 @@ class CrewaiPlatformToolBuilder: self._tools = tools - def __enter__(self): return self.tools() diff --git a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tools.py b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tools.py index 8bfa1073a..757154ac6 100644 --- a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tools.py +++ b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tools.py @@ -1,18 +1,16 @@ -import re -import os -import typing as t -from typing import Literal import logging -import json + from crewai.tools import BaseTool -from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import CrewaiPlatformToolBuilder + from crewai_tools.adapters.tool_collection import ToolCollection +from crewai_tools.tools.crewai_platform_tools.crewai_platform_tool_builder import ( + CrewaiPlatformToolBuilder, +) logger = logging.getLogger(__name__) - -def CrewaiPlatformTools( +def CrewaiPlatformTools( # noqa: N802 apps: list[str], ) -> ToolCollection[BaseTool]: """Factory function that returns crewai platform tools. diff --git a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/misc.py b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/misc.py index 0839719d7..06cf7147d 100644 --- a/packages/tools/src/crewai_tools/tools/crewai_platform_tools/misc.py +++ b/packages/tools/src/crewai_tools/tools/crewai_platform_tools/misc.py @@ -1,13 +1,17 @@ import os + def get_platform_api_base_url() -> str: """Get the platform API base URL from environment or use default.""" base_url = os.getenv("CREWAI_PLUS_URL", "https://app.crewai.com") return f"{base_url}/crewai_plus/api/v1/integrations" + def get_platform_integration_token() -> str: """Get the platform API base URL from environment or use default.""" token = os.getenv("CREWAI_PLATFORM_INTEGRATION_TOKEN") or "" if not token: - raise ValueError("No platform integration token found, please set the CREWAI_PLATFORM_INTEGRATION_TOKEN environment variable") - return token # TODO: Use context manager to get token + raise ValueError( + "No platform integration token found, please set the CREWAI_PLATFORM_INTEGRATION_TOKEN environment variable" + ) + return token # TODO: Use context manager to get token diff --git a/packages/tools/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py b/packages/tools/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py index 4be84efdd..bb9ba6dfa 100644 --- a/packages/tools/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/csv_search_tool/csv_search_tool.py @@ -1,11 +1,4 @@ -from typing import Optional, Type - -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -31,9 +24,9 @@ class CSVSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a CSV's content." ) - args_schema: Type[BaseModel] = CSVSearchToolSchema + args_schema: type[BaseModel] = CSVSearchToolSchema - def __init__(self, csv: Optional[str] = None, **kwargs): + def __init__(self, csv: str | None = None, **kwargs): super().__init__(**kwargs) if csv is not None: self.add(csv) @@ -42,15 +35,17 @@ class CSVSearchTool(RagTool): self._generate_description() def add(self, csv: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().add(csv, data_type=DataType.CSV) def _run( self, search_query: str, - csv: Optional[str] = None, + csv: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if csv is not None: self.add(csv) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/dalle_tool/dalle_tool.py b/packages/tools/src/crewai_tools/tools/dalle_tool/dalle_tool.py index 6a4a9e84f..4d3ef3413 100644 --- a/packages/tools/src/crewai_tools/tools/dalle_tool/dalle_tool.py +++ b/packages/tools/src/crewai_tools/tools/dalle_tool/dalle_tool.py @@ -1,5 +1,4 @@ import json -from typing import List, Type from crewai.tools import BaseTool, EnvVar from openai import OpenAI @@ -9,21 +8,27 @@ from pydantic import BaseModel, Field class ImagePromptSchema(BaseModel): """Input for Dall-E Tool.""" - image_description: str = Field(description="Description of the image to be generated by Dall-E.") + image_description: str = Field( + description="Description of the image to be generated by Dall-E." + ) class DallETool(BaseTool): name: str = "Dall-E Tool" description: str = "Generates images using OpenAI's Dall-E model." - args_schema: Type[BaseModel] = ImagePromptSchema + args_schema: type[BaseModel] = ImagePromptSchema model: str = "dall-e-3" size: str = "1024x1024" quality: str = "standard" n: int = 1 - env_vars: List[EnvVar] = [ - EnvVar(name="OPENAI_API_KEY", description="API key for OpenAI services", required=True), + env_vars: list[EnvVar] = [ + EnvVar( + name="OPENAI_API_KEY", + description="API key for OpenAI services", + required=True, + ), ] def _run(self, **kwargs) -> str: @@ -42,11 +47,9 @@ class DallETool(BaseTool): n=self.n, ) - image_data = json.dumps( + return json.dumps( { "image_url": response.data[0].url, "image_description": response.data[0].revised_prompt, } ) - - return image_data diff --git a/packages/tools/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py b/packages/tools/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py index fe73179cb..348bf00b0 100644 --- a/packages/tools/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py +++ b/packages/tools/src/crewai_tools/tools/databricks_query_tool/databricks_query_tool.py @@ -1,5 +1,5 @@ import os -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type, Union +from typing import TYPE_CHECKING, Any, Optional from crewai.tools import BaseTool from pydantic import BaseModel, Field, model_validator @@ -7,27 +7,31 @@ from pydantic import BaseModel, Field, model_validator if TYPE_CHECKING: from databricks.sdk import WorkspaceClient + class DatabricksQueryToolSchema(BaseModel): """Input schema for DatabricksQueryTool.""" query: str = Field( ..., description="SQL query to execute against the Databricks workspace table" ) - catalog: Optional[str] = Field( - None, description="Databricks catalog name (optional, defaults to configured catalog)" + catalog: str | None = Field( + None, + description="Databricks catalog name (optional, defaults to configured catalog)", ) - db_schema: Optional[str] = Field( - None, description="Databricks schema name (optional, defaults to configured schema)" + db_schema: str | None = Field( + None, + description="Databricks schema name (optional, defaults to configured schema)", ) - warehouse_id: Optional[str] = Field( - None, description="Databricks SQL warehouse ID (optional, defaults to configured warehouse)" + warehouse_id: str | None = Field( + None, + description="Databricks SQL warehouse ID (optional, defaults to configured warehouse)", ) - row_limit: Optional[int] = Field( + row_limit: int | None = Field( 1000, description="Maximum number of rows to return (default: 1000)" ) - @model_validator(mode='after') - def validate_input(self) -> 'DatabricksQueryToolSchema': + @model_validator(mode="after") + def validate_input(self) -> "DatabricksQueryToolSchema": """Validate the input parameters.""" # Ensure the query is not empty if not self.query or not self.query.strip(): @@ -61,21 +65,21 @@ class DatabricksQueryTool(BaseTool): "Execute SQL queries against Databricks workspace tables and return the results." " Provide a 'query' parameter with the SQL query to execute." ) - args_schema: Type[BaseModel] = DatabricksQueryToolSchema + args_schema: type[BaseModel] = DatabricksQueryToolSchema # Optional default parameters - default_catalog: Optional[str] = None - default_schema: Optional[str] = None - default_warehouse_id: Optional[str] = None + default_catalog: str | None = None + default_schema: str | None = None + default_warehouse_id: str | None = None _workspace_client: Optional["WorkspaceClient"] = None - package_dependencies: List[str] = ["databricks-sdk"] + package_dependencies: list[str] = ["databricks-sdk"] def __init__( self, - default_catalog: Optional[str] = None, - default_schema: Optional[str] = None, - default_warehouse_id: Optional[str] = None, + default_catalog: str | None = None, + default_schema: str | None = None, + default_warehouse_id: str | None = None, **kwargs: Any, ) -> None: """ @@ -96,7 +100,9 @@ class DatabricksQueryTool(BaseTool): def _validate_credentials(self) -> None: """Validate that Databricks credentials are available.""" has_profile = "DATABRICKS_CONFIG_PROFILE" in os.environ - has_direct_auth = "DATABRICKS_HOST" in os.environ and "DATABRICKS_TOKEN" in os.environ + has_direct_auth = ( + "DATABRICKS_HOST" in os.environ and "DATABRICKS_TOKEN" in os.environ + ) if not (has_profile or has_direct_auth): raise ValueError( @@ -110,6 +116,7 @@ class DatabricksQueryTool(BaseTool): if self._workspace_client is None: try: from databricks.sdk import WorkspaceClient + self._workspace_client = WorkspaceClient() except ImportError: raise ImportError( @@ -117,7 +124,7 @@ class DatabricksQueryTool(BaseTool): ) return self._workspace_client - def _format_results(self, results: List[Dict[str, Any]]) -> str: + def _format_results(self, results: list[dict[str, Any]]) -> str: """Format query results as a readable string.""" if not results: return "Query returned no results." @@ -149,8 +156,13 @@ class DatabricksQueryTool(BaseTool): data_rows = [] for row in results: # Handle None values by displaying "NULL" - row_values = {col: str(row[col]) if row[col] is not None else "NULL" for col in columns} - data_row = " | ".join(f"{row_values[col]:{col_widths[col]}}" for col in columns) + row_values = { + col: str(row[col]) if row[col] is not None else "NULL" + for col in columns + } + data_row = " | ".join( + f"{row_values[col]:{col_widths[col]}}" for col in columns + ) data_rows.append(data_row) # Add row count information @@ -190,7 +202,7 @@ class DatabricksQueryTool(BaseTool): catalog=catalog, db_schema=db_schema, warehouse_id=warehouse_id, - row_limit=row_limit + row_limit=row_limit, ) # Extract validated parameters @@ -212,18 +224,17 @@ class DatabricksQueryTool(BaseTool): try: # Execute the statement execution = statement.execute_statement( - warehouse_id=warehouse_id, - statement=query, - **context + warehouse_id=warehouse_id, statement=query, **context ) statement_id = execution.statement_id except Exception as execute_error: # Handle immediate execution errors - return f"Error starting query execution: {str(execute_error)}" + return f"Error starting query execution: {execute_error!s}" # Poll for results with better error handling import time + result = None timeout = 300 # 5 minutes timeout start_time = time.time() @@ -237,8 +248,10 @@ class DatabricksQueryTool(BaseTool): result = statement.get_statement(statement_id) # Check if finished - be very explicit about state checking - if hasattr(result, 'status') and hasattr(result.status, 'state'): - state_value = str(result.status.state) # Convert to string to handle both string and enum + if hasattr(result, "status") and hasattr(result.status, "state"): + state_value = str( + result.status.state + ) # Convert to string to handle both string and enum # Track state changes for debugging if previous_state != state_value: @@ -247,33 +260,38 @@ class DatabricksQueryTool(BaseTool): # Check if state indicates completion if "SUCCEEDED" in state_value: break - elif "FAILED" in state_value: + if "FAILED" in state_value: # Extract error message with more robust handling error_info = "No detailed error info" try: # First try direct access to error.message - if hasattr(result.status, 'error') and result.status.error: - if hasattr(result.status.error, 'message'): + if ( + hasattr(result.status, "error") + and result.status.error + ): + if hasattr(result.status.error, "message"): error_info = result.status.error.message # Some APIs may have a different structure - elif hasattr(result.status.error, 'error_message'): + elif hasattr(result.status.error, "error_message"): error_info = result.status.error.error_message # Last resort, try to convert the whole error object to string else: error_info = str(result.status.error) except Exception as err_extract_error: # If all else fails, try to get any info we can - error_info = f"Error details unavailable: {str(err_extract_error)}" + error_info = ( + f"Error details unavailable: {err_extract_error!s}" + ) # Return immediately on first FAILED state detection return f"Query execution failed: {error_info}" - elif "CANCELED" in state_value: + if "CANCELED" in state_value: return "Query was canceled" except Exception as poll_error: # Don't immediately fail - try again a few times if poll_count > 3: - return f"Error checking query status: {str(poll_error)}" + return f"Error checking query status: {poll_error!s}" # Wait before polling again time.sleep(2) @@ -282,21 +300,27 @@ class DatabricksQueryTool(BaseTool): if result is None: return "Query returned no result (likely timed out or failed)" - if not hasattr(result, 'status') or not hasattr(result.status, 'state'): + if not hasattr(result, "status") or not hasattr(result.status, "state"): return "Query completed but returned an invalid result structure" # Convert state to string for comparison state_value = str(result.status.state) - if not any(state in state_value for state in ["SUCCEEDED", "FAILED", "CANCELED"]): + if not any( + state in state_value for state in ["SUCCEEDED", "FAILED", "CANCELED"] + ): return f"Query timed out after 5 minutes (last state: {state_value})" # Get results - adapt this based on the actual structure of the result object chunk_results = [] # Check if we have results and a schema in a very defensive way - has_schema = (hasattr(result, 'manifest') and result.manifest is not None and - hasattr(result.manifest, 'schema') and result.manifest.schema is not None) - has_result = (hasattr(result, 'result') and result.result is not None) + has_schema = ( + hasattr(result, "manifest") + and result.manifest is not None + and hasattr(result.manifest, "schema") + and result.manifest.schema is not None + ) + has_result = hasattr(result, "result") and result.result is not None if has_schema and has_result: try: @@ -309,10 +333,12 @@ class DatabricksQueryTool(BaseTool): all_columns = set(columns) # Dump the raw structure of result data to help troubleshoot - if hasattr(result.result, 'data_array'): + if hasattr(result.result, "data_array"): # Add defensive check for None data_array if result.result.data_array is None: - print("data_array is None - likely an empty result set or DDL query") + print( + "data_array is None - likely an empty result set or DDL query" + ) # Return empty result handling rather than trying to process null data return "Query executed successfully (no data returned)" @@ -321,7 +347,12 @@ class DatabricksQueryTool(BaseTool): is_likely_incorrect_row_structure = False # Only try to analyze sample if data_array exists and has content - if hasattr(result.result, 'data_array') and result.result.data_array and len(result.result.data_array) > 0 and len(result.result.data_array[0]) > 0: + if ( + hasattr(result.result, "data_array") + and result.result.data_array + and len(result.result.data_array) > 0 + and len(result.result.data_array[0]) > 0 + ): sample_size = min(20, len(result.result.data_array[0])) if sample_size > 0: @@ -332,40 +363,81 @@ class DatabricksQueryTool(BaseTool): for i in range(sample_size): val = result.result.data_array[0][i] total_items += 1 - if isinstance(val, str) and len(val) == 1 and not val.isdigit(): + if ( + isinstance(val, str) + and len(val) == 1 + and not val.isdigit() + ): single_char_count += 1 - elif isinstance(val, str) and len(val) == 1 and val.isdigit(): + elif ( + isinstance(val, str) + and len(val) == 1 + and val.isdigit() + ): single_digit_count += 1 # If a significant portion of the first values are single characters or digits, # this likely indicates data is being incorrectly structured - if total_items > 0 and (single_char_count + single_digit_count) / total_items > 0.5: + if ( + total_items > 0 + and (single_char_count + single_digit_count) + / total_items + > 0.5 + ): is_likely_incorrect_row_structure = True # Additional check: if many rows have just 1 item when we expect multiple columns rows_with_single_item = 0 - if hasattr(result.result, 'data_array') and result.result.data_array and len(result.result.data_array) > 0: - sample_size_for_rows = min(sample_size, len(result.result.data_array[0])) if 'sample_size' in locals() else min(20, len(result.result.data_array[0])) - rows_with_single_item = sum(1 for row in result.result.data_array[0][:sample_size_for_rows] if isinstance(row, list) and len(row) == 1) - if rows_with_single_item > sample_size_for_rows * 0.5 and len(columns) > 1: + if ( + hasattr(result.result, "data_array") + and result.result.data_array + and len(result.result.data_array) > 0 + ): + sample_size_for_rows = ( + min(sample_size, len(result.result.data_array[0])) + if "sample_size" in locals() + else min(20, len(result.result.data_array[0])) + ) + rows_with_single_item = sum( + 1 + for row in result.result.data_array[0][ + :sample_size_for_rows + ] + if isinstance(row, list) and len(row) == 1 + ) + if ( + rows_with_single_item > sample_size_for_rows * 0.5 + and len(columns) > 1 + ): is_likely_incorrect_row_structure = True # Check if we're getting primarily single characters or the data structure seems off, # we should use special handling - if 'is_likely_incorrect_row_structure' in locals() and is_likely_incorrect_row_structure: - print("Data appears to be malformed - will use special row reconstruction") + if ( + "is_likely_incorrect_row_structure" in locals() + and is_likely_incorrect_row_structure + ): + print( + "Data appears to be malformed - will use special row reconstruction" + ) needs_special_string_handling = True else: needs_special_string_handling = False # Process results differently based on detection - if 'needs_special_string_handling' in locals() and needs_special_string_handling: + if ( + "needs_special_string_handling" in locals() + and needs_special_string_handling + ): # We're dealing with data where the rows may be incorrectly structured print("Using row reconstruction processing mode") # Collect all values into a flat list all_values = [] - if hasattr(result.result, 'data_array') and result.result.data_array: + if ( + hasattr(result.result, "data_array") + and result.result.data_array + ): # Flatten all values into a single list for chunk in result.result.data_array: for item in chunk: @@ -386,32 +458,43 @@ class DatabricksQueryTool(BaseTool): # Use regex pattern to identify ID columns that likely start a new row import re - id_pattern = re.compile(r'^\d{5,9}$') # Netflix IDs are often 5-9 digits + + id_pattern = re.compile( + r"^\d{5,9}$" + ) # Netflix IDs are often 5-9 digits id_indices = [] for i, val in enumerate(all_values): if isinstance(val, str) and id_pattern.match(val): # This value looks like an ID, might be the start of a row if i < len(all_values) - 1: - next_few_values = all_values[i+1:i+5] + next_few_values = all_values[i + 1 : i + 5] # If following values look like they could be part of a title - if any(isinstance(v, str) and len(v) > 1 for v in next_few_values): + if any( + isinstance(v, str) and len(v) > 1 + for v in next_few_values + ): id_indices.append(i) if id_indices: - # If we found potential row starts, use them to extract rows for i in range(len(id_indices)): start_idx = id_indices[i] - end_idx = id_indices[i+1] if i+1 < len(id_indices) else len(all_values) + end_idx = ( + id_indices[i + 1] + if i + 1 < len(id_indices) + else len(all_values) + ) # Extract values for this row row_values = all_values[start_idx:end_idx] # Special handling for Netflix title data # Titles might be split into individual characters - if 'Title' in columns and len(row_values) > expected_column_count: - + if ( + "Title" in columns + and len(row_values) > expected_column_count + ): # Try to reconstruct by looking for patterns # We know ID is first, then Title (which may be split) # Then other fields like Genre, etc. @@ -424,7 +507,14 @@ class DatabricksQueryTool(BaseTool): for j in range(2, min(100, len(row_values))): val = row_values[j] # Check for common genres or non-title markers - if isinstance(val, str) and val in ['Comedy', 'Drama', 'Action', 'Horror', 'Thriller', 'Documentary']: + if isinstance(val, str) and val in [ + "Comedy", + "Drama", + "Action", + "Horror", + "Thriller", + "Documentary", + ]: # Likely found the Genre field title_end_idx = j break @@ -433,15 +523,24 @@ class DatabricksQueryTool(BaseTool): if title_end_idx > 1: title_chars = row_values[1:title_end_idx] # Check if they're individual characters - if all(isinstance(c, str) and len(c) == 1 for c in title_chars): - title = ''.join(title_chars) - row_dict['Title'] = title + if all( + isinstance(c, str) and len(c) == 1 + for c in title_chars + ): + title = "".join(title_chars) + row_dict["Title"] = title # Assign remaining values to columns - remaining_values = row_values[title_end_idx:] - for j, col_name in enumerate(columns[2:], 2): - if j-2 < len(remaining_values): - row_dict[col_name] = remaining_values[j-2] + remaining_values = row_values[ + title_end_idx: + ] + for j, col_name in enumerate( + columns[2:], 2 + ): + if j - 2 < len(remaining_values): + row_dict[col_name] = ( + remaining_values[j - 2] + ) else: row_dict[col_name] = None else: @@ -463,7 +562,9 @@ class DatabricksQueryTool(BaseTool): reconstructed_rows.append(row_dict) else: # More intelligent chunking - try to detect where columns like Title might be split - title_idx = columns.index('Title') if 'Title' in columns else -1 + title_idx = ( + columns.index("Title") if "Title" in columns else -1 + ) if title_idx >= 0: print("Attempting title reconstruction method") @@ -471,21 +572,27 @@ class DatabricksQueryTool(BaseTool): i = 0 while i < len(all_values): # Check if this could be an ID (start of a row) - if isinstance(all_values[i], str) and id_pattern.match(all_values[i]): + if isinstance( + all_values[i], str + ) and id_pattern.match(all_values[i]): row_dict = {columns[0]: all_values[i]} i += 1 # Try to reconstruct title if it appears to be split title_chars = [] - while (i < len(all_values) and - isinstance(all_values[i], str) and - len(all_values[i]) <= 1 and - len(title_chars) < 100): # Cap title length + while ( + i < len(all_values) + and isinstance(all_values[i], str) + and len(all_values[i]) <= 1 + and len(title_chars) < 100 + ): # Cap title length title_chars.append(all_values[i]) i += 1 if title_chars: - row_dict[columns[title_idx]] = ''.join(title_chars) + row_dict[columns[title_idx]] = "".join( + title_chars + ) # Add remaining fields for j in range(title_idx + 1, len(columns)): @@ -502,11 +609,18 @@ class DatabricksQueryTool(BaseTool): # If we still don't have rows, use simple chunking as fallback if not reconstructed_rows: print("Falling back to basic chunking approach") - chunks = [all_values[i:i+expected_column_count] for i in range(0, len(all_values), expected_column_count)] + chunks = [ + all_values[i : i + expected_column_count] + for i in range( + 0, len(all_values), expected_column_count + ) + ] for chunk in chunks: # Skip chunks that seem to be partial/incomplete rows - if len(chunk) < expected_column_count * 0.75: # Allow for some missing values + if ( + len(chunk) < expected_column_count * 0.75 + ): # Allow for some missing values continue row_dict = {} @@ -521,13 +635,16 @@ class DatabricksQueryTool(BaseTool): reconstructed_rows.append(row_dict) # Apply post-processing to fix known issues - if reconstructed_rows and 'Title' in columns: + if reconstructed_rows and "Title" in columns: print("Applying post-processing to improve data quality") for row in reconstructed_rows: # Fix titles that might still have issues - if isinstance(row.get('Title'), str) and len(row.get('Title')) <= 1: + if ( + isinstance(row.get("Title"), str) + and len(row.get("Title")) <= 1 + ): # This is likely still a fragmented title - mark as potentially incomplete - row['Title'] = f"[INCOMPLETE] {row.get('Title')}" + row["Title"] = f"[INCOMPLETE] {row.get('Title')}" # Ensure we respect the row limit if row_limit and len(reconstructed_rows) > row_limit: @@ -539,28 +656,53 @@ class DatabricksQueryTool(BaseTool): print("Using standard processing mode") # Check different result structures - if hasattr(result.result, 'data_array') and result.result.data_array: + if ( + hasattr(result.result, "data_array") + and result.result.data_array + ): # Check if data appears to be malformed within chunks - for chunk_idx, chunk in enumerate(result.result.data_array): - + for _chunk_idx, chunk in enumerate( + result.result.data_array + ): # Check if chunk might actually contain individual columns of a single row # This is another way data might be malformed - check the first few values if len(chunk) > 0 and len(columns) > 1: # If there seems to be a mismatch between chunk structure and expected columns - first_few_values = chunk[:min(5, len(chunk))] - if all(isinstance(val, (str, int, float)) and not isinstance(val, (list, dict)) for val in first_few_values): - if len(chunk) > len(columns) * 3: # Heuristic: if chunk has way more items than columns - print("Chunk appears to contain individual values rather than rows - switching to row reconstruction") + first_few_values = chunk[: min(5, len(chunk))] + if all( + isinstance(val, (str, int, float)) + and not isinstance(val, (list, dict)) + for val in first_few_values + ): + if ( + len(chunk) > len(columns) * 3 + ): # Heuristic: if chunk has way more items than columns + print( + "Chunk appears to contain individual values rather than rows - switching to row reconstruction" + ) # This chunk might actually be values of multiple rows - try to reconstruct values = chunk # All values in this chunk reconstructed_rows = [] # Try to create rows based on expected column count - for i in range(0, len(values), len(columns)): - if i + len(columns) <= len(values): # Ensure we have enough values - row_values = values[i:i+len(columns)] - row_dict = {col: val for col, val in zip(columns, row_values)} + for i in range( + 0, len(values), len(columns) + ): + if i + len(columns) <= len( + values + ): # Ensure we have enough values + row_values = values[ + i : i + len(columns) + ] + row_dict = { + col: val + for col, val in zip( + columns, + row_values, + strict=False, + ) + } reconstructed_rows.append(row_dict) if reconstructed_rows: @@ -569,21 +711,36 @@ class DatabricksQueryTool(BaseTool): # Special case: when chunk contains exactly the right number of values for a single row # This handles the case where instead of a list of rows, we just got all values in a flat list - if all(isinstance(val, (str, int, float)) and not isinstance(val, (list, dict)) for val in chunk): - if len(chunk) == len(columns) or (len(chunk) > 0 and len(chunk) % len(columns) == 0): - + if all( + isinstance(val, (str, int, float)) + and not isinstance(val, (list, dict)) + for val in chunk + ): + if len(chunk) == len(columns) or ( + len(chunk) > 0 + and len(chunk) % len(columns) == 0 + ): # Process flat list of values as rows for i in range(0, len(chunk), len(columns)): - row_values = chunk[i:i+len(columns)] - if len(row_values) == len(columns): # Only process complete rows - row_dict = {col: val for col, val in zip(columns, row_values)} + row_values = chunk[i : i + len(columns)] + if len(row_values) == len( + columns + ): # Only process complete rows + row_dict = { + col: val + for col, val in zip( + columns, + row_values, + strict=False, + ) + } chunk_results.append(row_dict) # Skip regular row processing for this chunk continue # Normal processing for typical row structure - for row_idx, row in enumerate(chunk): + for _row_idx, row in enumerate(chunk): # Ensure row is actually a collection of values if not isinstance(row, (list, tuple, dict)): # This might be a single value; skip it or handle specially @@ -599,7 +756,9 @@ class DatabricksQueryTool(BaseTool): elif isinstance(row, (list, tuple)): # Map list of values to columns for i, val in enumerate(row): - if i < len(columns): # Only process if we have a matching column + if ( + i < len(columns) + ): # Only process if we have a matching column row_dict[columns[i]] = val else: # Extra values without column names @@ -614,16 +773,18 @@ class DatabricksQueryTool(BaseTool): chunk_results.append(row_dict) - elif hasattr(result.result, 'data') and result.result.data: + elif hasattr(result.result, "data") and result.result.data: # Alternative data structure - for row_idx, row in enumerate(result.result.data): + for _row_idx, row in enumerate(result.result.data): # Debug info # Safely create dictionary matching column names to values row_dict = {} for i, val in enumerate(row): - if i < len(columns): # Only process if we have a matching column + if i < len( + columns + ): # Only process if we have a matching column row_dict[columns[i]] = val else: # Extra values without column names @@ -642,7 +803,9 @@ class DatabricksQueryTool(BaseTool): normalized_results = [] for row in chunk_results: # Create a new row with all columns, defaulting to None for missing ones - normalized_row = {col: row.get(col, None) for col in all_columns} + normalized_row = { + col: row.get(col, None) for col in all_columns + } normalized_results.append(normalized_row) # Replace the original results with normalized ones @@ -651,11 +814,12 @@ class DatabricksQueryTool(BaseTool): except Exception as results_error: # Enhanced error message with more context import traceback + error_details = traceback.format_exc() - return f"Error processing query results: {str(results_error)}\n\nDetails:\n{error_details}" + return f"Error processing query results: {results_error!s}\n\nDetails:\n{error_details}" # If we have no results but the query succeeded (e.g., for DDL statements) - if not chunk_results and hasattr(result, 'status'): + if not chunk_results and hasattr(result, "status"): state_value = str(result.status.state) if "SUCCEEDED" in state_value: return "Query executed successfully (no results to display)" @@ -666,5 +830,8 @@ class DatabricksQueryTool(BaseTool): except Exception as e: # Include more details in the error message to help with debugging import traceback + error_details = traceback.format_exc() - return f"Error executing Databricks query: {str(e)}\n\nDetails:\n{error_details}" + return ( + f"Error executing Databricks query: {e!s}\n\nDetails:\n{error_details}" + ) diff --git a/packages/tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py b/packages/tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py index 8488f391e..d3f88c921 100644 --- a/packages/tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py +++ b/packages/tools/src/crewai_tools/tools/directory_read_tool/directory_read_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, Optional, Type +from typing import Any from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -20,10 +20,10 @@ class DirectoryReadTool(BaseTool): description: str = ( "A tool that can be used to recursively list a directory's content." ) - args_schema: Type[BaseModel] = DirectoryReadToolSchema - directory: Optional[str] = None + args_schema: type[BaseModel] = DirectoryReadToolSchema + directory: str | None = None - def __init__(self, directory: Optional[str] = None, **kwargs): + def __init__(self, directory: str | None = None, **kwargs): super().__init__(**kwargs) if directory is not None: self.directory = directory diff --git a/packages/tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py b/packages/tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py index 30fdd52cc..760c19d1c 100644 --- a/packages/tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/directory_search_tool/directory_search_tool.py @@ -1,11 +1,4 @@ -from typing import Optional, Type - -try: - from embedchain.loaders.directory_loader import DirectoryLoader - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -31,11 +24,9 @@ class DirectorySearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a directory's content." ) - args_schema: Type[BaseModel] = DirectorySearchToolSchema + args_schema: type[BaseModel] = DirectorySearchToolSchema - def __init__(self, directory: Optional[str] = None, **kwargs): - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") + def __init__(self, directory: str | None = None, **kwargs): super().__init__(**kwargs) if directory is not None: self.add(directory) @@ -44,16 +35,17 @@ class DirectorySearchTool(RagTool): self._generate_description() def add(self, directory: str) -> None: - super().add( - directory, - loader=DirectoryLoader(config=dict(recursive=True)), - ) + super().add(directory, data_type=DataType.DIRECTORY) def _run( self, search_query: str, - directory: Optional[str] = None, + directory: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if directory is not None: self.add(directory) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py b/packages/tools/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py index 97dab02cd..19b861fa6 100644 --- a/packages/tools/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/docx_search_tool/docx_search_tool.py @@ -1,11 +1,6 @@ -from typing import Any, Optional, Type - -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False +from typing import Any +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -14,7 +9,7 @@ from ..rag.rag_tool import RagTool class FixedDOCXSearchToolSchema(BaseModel): """Input for DOCXSearchTool.""" - docx: Optional[str] = Field( + docx: str | None = Field( ..., description="File path or URL of a DOCX file to be searched" ) search_query: str = Field( @@ -37,9 +32,9 @@ class DOCXSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a DOCX's content." ) - args_schema: Type[BaseModel] = DOCXSearchToolSchema + args_schema: type[BaseModel] = DOCXSearchToolSchema - def __init__(self, docx: Optional[str] = None, **kwargs): + def __init__(self, docx: str | None = None, **kwargs): super().__init__(**kwargs) if docx is not None: self.add(docx) @@ -48,15 +43,17 @@ class DOCXSearchTool(RagTool): self._generate_description() def add(self, docx: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().add(docx, data_type=DataType.DOCX) def _run( self, search_query: str, - docx: Optional[str] = None, + docx: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> Any: if docx is not None: self.add(docx) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/exa_tools/exa_search_tool.py b/packages/tools/src/crewai_tools/tools/exa_tools/exa_search_tool.py index 332576039..22b0b7053 100644 --- a/packages/tools/src/crewai_tools/tools/exa_tools/exa_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/exa_tools/exa_search_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, List, Optional, Type +from typing import Any, Optional from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field @@ -17,13 +17,11 @@ class EXABaseToolSchema(BaseModel): search_query: str = Field( ..., description="Mandatory search query you want to use to search the internet" ) - start_published_date: Optional[str] = Field( + start_published_date: str | None = Field( None, description="Start date for the search" ) - end_published_date: Optional[str] = Field( - None, description="End date for the search" - ) - include_domains: Optional[list[str]] = Field( + end_published_date: str | None = Field(None, description="End date for the search") + include_domains: list[str] | None = Field( None, description="List of domains to include in the search" ) @@ -32,18 +30,18 @@ class EXASearchTool(BaseTool): model_config = {"arbitrary_types_allowed": True} name: str = "EXASearchTool" description: str = "Search the internet using Exa" - args_schema: Type[BaseModel] = EXABaseToolSchema + args_schema: type[BaseModel] = EXABaseToolSchema client: Optional["Exa"] = None - content: Optional[bool] = False - summary: Optional[bool] = False - type: Optional[str] = "auto" - package_dependencies: List[str] = ["exa_py"] - api_key: Optional[str] = Field( + content: bool | None = False + summary: bool | None = False + type: str | None = "auto" + package_dependencies: list[str] = ["exa_py"] + api_key: str | None = Field( default_factory=lambda: os.getenv("EXA_API_KEY"), description="API key for Exa services", json_schema_extra={"required": False}, ) - env_vars: List[EnvVar] = [ + env_vars: list[EnvVar] = [ EnvVar( name="EXA_API_KEY", description="API key for Exa services", required=False ), @@ -51,9 +49,9 @@ class EXASearchTool(BaseTool): def __init__( self, - content: Optional[bool] = False, - summary: Optional[bool] = False, - type: Optional[str] = "auto", + content: bool | None = False, + summary: bool | None = False, + type: str | None = "auto", **kwargs, ): super().__init__( @@ -81,9 +79,9 @@ class EXASearchTool(BaseTool): def _run( self, search_query: str, - start_published_date: Optional[str] = None, - end_published_date: Optional[str] = None, - include_domains: Optional[list[str]] = None, + start_published_date: str | None = None, + end_published_date: str | None = None, + include_domains: list[str] | None = None, ) -> Any: if self.client is None: raise ValueError("Client not initialized") diff --git a/packages/tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py b/packages/tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py index 4e04e3a7d..9d8637a4c 100644 --- a/packages/tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py +++ b/packages/tools/src/crewai_tools/tools/file_read_tool/file_read_tool.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Type +from typing import Any from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -8,8 +8,12 @@ class FileReadToolSchema(BaseModel): """Input for FileReadTool.""" file_path: str = Field(..., description="Mandatory file full path to read the file") - start_line: Optional[int] = Field(1, description="Line number to start reading from (1-indexed)") - line_count: Optional[int] = Field(None, description="Number of lines to read. If None, reads the entire file") + start_line: int | None = Field( + 1, description="Line number to start reading from (1-indexed)" + ) + line_count: int | None = Field( + None, description="Number of lines to read. If None, reads the entire file" + ) class FileReadTool(BaseTool): @@ -38,10 +42,10 @@ class FileReadTool(BaseTool): name: str = "Read a file's content" description: str = "A tool that reads the content of a file. To use this tool, provide a 'file_path' parameter with the path to the file you want to read. Optionally, provide 'start_line' to start reading from a specific line and 'line_count' to limit the number of lines read." - args_schema: Type[BaseModel] = FileReadToolSchema - file_path: Optional[str] = None + args_schema: type[BaseModel] = FileReadToolSchema + file_path: str | None = None - def __init__(self, file_path: Optional[str] = None, **kwargs: Any) -> None: + def __init__(self, file_path: str | None = None, **kwargs: Any) -> None: """Initialize the FileReadTool. Args: @@ -59,18 +63,16 @@ class FileReadTool(BaseTool): def _run( self, - file_path: Optional[str] = None, - start_line: Optional[int] = 1, - line_count: Optional[int] = None, + file_path: str | None = None, + start_line: int | None = 1, + line_count: int | None = None, ) -> str: file_path = file_path or self.file_path start_line = start_line or 1 line_count = line_count or None if file_path is None: - return ( - "Error: No file path provided. Please provide a file path either in the constructor or as an argument." - ) + return "Error: No file path provided. Please provide a file path either in the constructor or as an argument." try: with open(file_path, "r") as file: @@ -82,7 +84,8 @@ class FileReadTool(BaseTool): selected_lines = [ line for i, line in enumerate(file) - if i >= start_idx and (line_count is None or i < start_idx + line_count) + if i >= start_idx + and (line_count is None or i < start_idx + line_count) ] if not selected_lines and start_idx > 0: @@ -94,4 +97,4 @@ class FileReadTool(BaseTool): except PermissionError: return f"Error: Permission denied when trying to read file: {file_path}" except Exception as e: - return f"Error: Failed to read file {file_path}. {str(e)}" + return f"Error: Failed to read file {file_path}. {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py b/packages/tools/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py index 8b9ca5225..33b43985d 100644 --- a/packages/tools/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py +++ b/packages/tools/src/crewai_tools/tools/file_writer_tool/file_writer_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, Optional, Type +from typing import Any from crewai.tools import BaseTool from pydantic import BaseModel @@ -11,25 +11,22 @@ def strtobool(val) -> bool: val = val.lower() if val in ("y", "yes", "t", "true", "on", "1"): return True - elif val in ("n", "no", "f", "false", "off", "0"): + if val in ("n", "no", "f", "false", "off", "0"): return False - else: - raise ValueError(f"invalid value to cast to bool: {val!r}") + raise ValueError(f"invalid value to cast to bool: {val!r}") class FileWriterToolInput(BaseModel): filename: str - directory: Optional[str] = "./" + directory: str | None = "./" overwrite: str | bool = False content: str class FileWriterTool(BaseTool): name: str = "File Writer Tool" - description: str = ( - "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input." - ) - args_schema: Type[BaseModel] = FileWriterToolInput + description: str = "A tool to write content to a specified file. Accepts filename, content, and optionally a directory path and overwrite flag as input." + args_schema: type[BaseModel] = FileWriterToolInput def _run(self, **kwargs: Any) -> str: try: @@ -57,6 +54,6 @@ class FileWriterTool(BaseTool): f"File {filepath} already exists and overwrite option was not passed." ) except KeyError as e: - return f"An error occurred while accessing key: {str(e)}" + return f"An error occurred while accessing key: {e!s}" except Exception as e: - return f"An error occurred while writing to the file: {str(e)}" + return f"An error occurred while writing to the file: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/file_writer_tool/tests/test_file_writer_tool.py b/packages/tools/src/crewai_tools/tools/file_writer_tool/tests/test_file_writer_tool.py index d75ed30f2..d772fb8c1 100644 --- a/packages/tools/src/crewai_tools/tools/file_writer_tool/tests/test_file_writer_tool.py +++ b/packages/tools/src/crewai_tools/tools/file_writer_tool/tests/test_file_writer_tool.py @@ -3,7 +3,6 @@ import shutil import tempfile import pytest - from crewai_tools.tools.file_writer_tool.file_writer_tool import FileWriterTool diff --git a/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py b/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py index c86fd64e0..b254e85dd 100644 --- a/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py +++ b/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool.py @@ -1,17 +1,28 @@ import os -import zipfile import tarfile -from typing import Type, Optional -from pydantic import BaseModel, Field +import zipfile + from crewai.tools import BaseTool +from pydantic import BaseModel, Field class FileCompressorToolInput(BaseModel): """Input schema for FileCompressorTool.""" - input_path: str = Field(..., description="Path to the file or directory to compress.") - output_path: Optional[str] = Field(default=None, description="Optional output archive filename.") - overwrite: bool = Field(default=False, description="Whether to overwrite the archive if it already exists.") - format: str = Field(default="zip", description="Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').") + + input_path: str = Field( + ..., description="Path to the file or directory to compress." + ) + output_path: str | None = Field( + default=None, description="Optional output archive filename." + ) + overwrite: bool = Field( + default=False, + description="Whether to overwrite the archive if it already exists.", + ) + format: str = Field( + default="zip", + description="Compression format ('zip', 'tar', 'tar.gz', 'tar.bz2', 'tar.xz').", + ) class FileCompressorTool(BaseTool): @@ -20,58 +31,65 @@ class FileCompressorTool(BaseTool): "Compresses a file or directory into an archive (.zip currently supported). " "Useful for archiving logs, documents, or backups." ) - args_schema: Type[BaseModel] = FileCompressorToolInput + args_schema: type[BaseModel] = FileCompressorToolInput - - def _run(self, input_path: str, output_path: Optional[str] = None, overwrite: bool = False, format: str = "zip") -> str: - - if not os.path.exists(input_path): - return f"Input path '{input_path}' does not exist." - - if not output_path: - output_path = self._generate_output_path(input_path, format) - - FORMAT_EXTENSION = { - "zip": ".zip", - "tar": ".tar", - "tar.gz": ".tar.gz", - "tar.bz2": ".tar.bz2", - "tar.xz": ".tar.xz" - } - - if format not in FORMAT_EXTENSION: - return f"Compression format '{format}' is not supported. Allowed formats: {', '.join(FORMAT_EXTENSION.keys())}" - elif not output_path.endswith(FORMAT_EXTENSION[format]): - return f"Error: If '{format}' format is chosen, output file must have a '{FORMAT_EXTENSION[format]}' extension." - if not self._prepare_output(output_path, overwrite): - return f"Output '{output_path}' already exists and overwrite is set to False." + def _run( + self, + input_path: str, + output_path: str | None = None, + overwrite: bool = False, + format: str = "zip", + ) -> str: + if not os.path.exists(input_path): + return f"Input path '{input_path}' does not exist." - try: - format_compression = { - "zip": self._compress_zip, - "tar": self._compress_tar, - "tar.gz": self._compress_tar, - "tar.bz2": self._compress_tar, - "tar.xz": self._compress_tar - } - if format == "zip": - format_compression[format](input_path, output_path) - else: - format_compression[format](input_path, output_path, format) - - return f"Successfully compressed '{input_path}' into '{output_path}'" - except FileNotFoundError: - return f"Error: File not found at path: {input_path}" - except PermissionError: - return f"Error: Permission denied when accessing '{input_path}' or writing '{output_path}'" - except Exception as e: - return f"An unexpected error occurred during compression: {str(e)}" + if not output_path: + output_path = self._generate_output_path(input_path, format) + FORMAT_EXTENSION = { + "zip": ".zip", + "tar": ".tar", + "tar.gz": ".tar.gz", + "tar.bz2": ".tar.bz2", + "tar.xz": ".tar.xz", + } + + if format not in FORMAT_EXTENSION: + return f"Compression format '{format}' is not supported. Allowed formats: {', '.join(FORMAT_EXTENSION.keys())}" + if not output_path.endswith(FORMAT_EXTENSION[format]): + return f"Error: If '{format}' format is chosen, output file must have a '{FORMAT_EXTENSION[format]}' extension." + if not self._prepare_output(output_path, overwrite): + return ( + f"Output '{output_path}' already exists and overwrite is set to False." + ) + + try: + format_compression = { + "zip": self._compress_zip, + "tar": self._compress_tar, + "tar.gz": self._compress_tar, + "tar.bz2": self._compress_tar, + "tar.xz": self._compress_tar, + } + if format == "zip": + format_compression[format](input_path, output_path) + else: + format_compression[format](input_path, output_path, format) + + return f"Successfully compressed '{input_path}' into '{output_path}'" + except FileNotFoundError: + return f"Error: File not found at path: {input_path}" + except PermissionError: + return f"Error: Permission denied when accessing '{input_path}' or writing '{output_path}'" + except Exception as e: + return f"An unexpected error occurred during compression: {e!s}" def _generate_output_path(self, input_path: str, format: str) -> str: """Generates output path based on input path and format.""" if os.path.isfile(input_path): - base_name = os.path.splitext(os.path.basename(input_path))[0] # Remove extension + base_name = os.path.splitext(os.path.basename(input_path))[ + 0 + ] # Remove extension else: base_name = os.path.basename(os.path.normpath(input_path)) # Directory name return os.path.join(os.getcwd(), f"{base_name}.{format}") @@ -87,7 +105,7 @@ class FileCompressorTool(BaseTool): def _compress_zip(self, input_path: str, output_path: str): """Compresses input into a zip archive.""" - with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zipf: + with zipfile.ZipFile(output_path, "w", zipfile.ZIP_DEFLATED) as zipf: if os.path.isfile(input_path): zipf.write(input_path, os.path.basename(input_path)) else: @@ -97,19 +115,18 @@ class FileCompressorTool(BaseTool): arcname = os.path.relpath(full_path, start=input_path) zipf.write(full_path, arcname) - def _compress_tar(self, input_path: str, output_path: str, format: str): """Compresses input into a tar archive with the given format.""" format_mode = { "tar": "w", "tar.gz": "w:gz", "tar.bz2": "w:bz2", - "tar.xz": "w:xz" + "tar.xz": "w:xz", } if format not in format_mode: raise ValueError(f"Unsupported tar format: {format}") - + mode = format_mode[format] with tarfile.open(output_path, mode) as tarf: diff --git a/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py b/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py index b30199842..9ed902552 100644 --- a/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py +++ b/packages/tools/src/crewai_tools/tools/files_compressor_tool/files_compressor_tool_test2.py @@ -1,88 +1,126 @@ +from unittest.mock import patch -import os import pytest from crewai_tools.tools.files_compressor_tool import FileCompressorTool -from unittest.mock import patch, MagicMock + @pytest.fixture def tool(): return FileCompressorTool() + @patch("os.path.exists", return_value=False) def test_input_path_does_not_exist(mock_exists, tool): result = tool._run("nonexistent_path") assert "does not exist" in result + @patch("os.path.exists", return_value=True) @patch("os.getcwd", return_value="/mocked/cwd") @patch.object(FileCompressorTool, "_compress_zip") # Mock actual compression @patch.object(FileCompressorTool, "_prepare_output", return_value=True) -def test_generate_output_path_default(mock_prepare, mock_compress, mock_cwd, mock_exists, tool): +def test_generate_output_path_default( + mock_prepare, mock_compress, mock_cwd, mock_exists, tool +): result = tool._run(input_path="mydir", format="zip") assert "Successfully compressed" in result mock_compress.assert_called_once() + @patch("os.path.exists", return_value=True) @patch.object(FileCompressorTool, "_compress_zip") @patch.object(FileCompressorTool, "_prepare_output", return_value=True) def test_zip_compression(mock_prepare, mock_compress, mock_exists, tool): - result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=True) + result = tool._run( + input_path="some/path", output_path="archive.zip", format="zip", overwrite=True + ) assert "Successfully compressed" in result mock_compress.assert_called_once() + @patch("os.path.exists", return_value=True) @patch.object(FileCompressorTool, "_compress_tar") @patch.object(FileCompressorTool, "_prepare_output", return_value=True) def test_tar_gz_compression(mock_prepare, mock_compress, mock_exists, tool): - result = tool._run(input_path="some/path", output_path="archive.tar.gz", format="tar.gz", overwrite=True) + result = tool._run( + input_path="some/path", + output_path="archive.tar.gz", + format="tar.gz", + overwrite=True, + ) assert "Successfully compressed" in result mock_compress.assert_called_once() + @pytest.mark.parametrize("format", ["tar", "tar.bz2", "tar.xz"]) @patch("os.path.exists", return_value=True) @patch.object(FileCompressorTool, "_compress_tar") @patch.object(FileCompressorTool, "_prepare_output", return_value=True) def test_other_tar_formats(mock_prepare, mock_compress, mock_exists, format, tool): - result = tool._run(input_path="path/to/input", output_path=f"archive.{format}", format=format, overwrite=True) + result = tool._run( + input_path="path/to/input", + output_path=f"archive.{format}", + format=format, + overwrite=True, + ) assert "Successfully compressed" in result mock_compress.assert_called_once() + @pytest.mark.parametrize("format", ["rar", "7z"]) -@patch("os.path.exists", return_value=True) #Ensure input_path exists +@patch("os.path.exists", return_value=True) # Ensure input_path exists def test_unsupported_format(_, tool, format): - result = tool._run(input_path="some/path", output_path=f"archive.{format}", format=format) + result = tool._run( + input_path="some/path", output_path=f"archive.{format}", format=format + ) assert "not supported" in result -@patch("os.path.exists", return_value=True) -def test_extension_mismatch(_ , tool): - result = tool._run(input_path="some/path", output_path="archive.zip", format="tar.gz") + +@patch("os.path.exists", return_value=True) +def test_extension_mismatch(_, tool): + result = tool._run( + input_path="some/path", output_path="archive.zip", format="tar.gz" + ) assert "must have a '.tar.gz' extension" in result + @patch("os.path.exists", return_value=True) @patch("os.path.isfile", return_value=True) @patch("os.path.exists", return_value=True) def test_existing_output_no_overwrite(_, __, ___, tool): - result = tool._run(input_path="some/path", output_path="archive.zip", format="zip", overwrite=False) + result = tool._run( + input_path="some/path", output_path="archive.zip", format="zip", overwrite=False + ) assert "overwrite is set to False" in result + @patch("os.path.exists", return_value=True) @patch("zipfile.ZipFile", side_effect=PermissionError) def test_permission_error(mock_zip, _, tool): - result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True) + result = tool._run( + input_path="file.txt", output_path="file.zip", format="zip", overwrite=True + ) assert "Permission denied" in result + @patch("os.path.exists", return_value=True) @patch("zipfile.ZipFile", side_effect=FileNotFoundError) def test_file_not_found_during_zip(mock_zip, _, tool): - result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True) + result = tool._run( + input_path="file.txt", output_path="file.zip", format="zip", overwrite=True + ) assert "File not found" in result + @patch("os.path.exists", return_value=True) @patch("zipfile.ZipFile", side_effect=Exception("Unexpected")) def test_general_exception_during_zip(mock_zip, _, tool): - result = tool._run(input_path="file.txt", output_path="file.zip", format="zip", overwrite=True) + result = tool._run( + input_path="file.txt", output_path="file.zip", format="zip", overwrite=True + ) assert "unexpected error" in result - + + # Test: Output directory is created when missing @patch("os.makedirs") @patch("os.path.exists", return_value=False) diff --git a/packages/tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py b/packages/tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py index 9c99fe8d4..19113b237 100644 --- a/packages/tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py +++ b/packages/tools/src/crewai_tools/tools/firecrawl_crawl_website_tool/firecrawl_crawl_website_tool.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Type, List, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr @@ -43,9 +43,9 @@ class FirecrawlCrawlWebsiteTool(BaseTool): ) name: str = "Firecrawl web crawl tool" description: str = "Crawl webpages using Firecrawl and return the contents" - args_schema: Type[BaseModel] = FirecrawlCrawlWebsiteToolSchema - api_key: Optional[str] = None - config: Optional[dict[str, Any]] = Field( + args_schema: type[BaseModel] = FirecrawlCrawlWebsiteToolSchema + api_key: str | None = None + config: dict[str, Any] | None = Field( default_factory=lambda: { "maxDepth": 2, "ignoreSitemap": True, @@ -60,12 +60,16 @@ class FirecrawlCrawlWebsiteTool(BaseTool): } ) _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) - package_dependencies: List[str] = ["firecrawl-py"] - env_vars: List[EnvVar] = [ - EnvVar(name="FIRECRAWL_API_KEY", description="API key for Firecrawl services", required=True), + package_dependencies: list[str] = ["firecrawl-py"] + env_vars: list[EnvVar] = [ + EnvVar( + name="FIRECRAWL_API_KEY", + description="API key for Firecrawl services", + required=True, + ), ] - def __init__(self, api_key: Optional[str] = None, **kwargs): + def __init__(self, api_key: str | None = None, **kwargs): super().__init__(**kwargs) self.api_key = api_key self._initialize_firecrawl() diff --git a/packages/tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py b/packages/tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py index 816e40159..790cb0b5a 100644 --- a/packages/tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py +++ b/packages/tools/src/crewai_tools/tools/firecrawl_scrape_website_tool/firecrawl_scrape_website_tool.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Type, Dict, List, TYPE_CHECKING +from typing import TYPE_CHECKING, Any, Optional from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr @@ -41,9 +41,9 @@ class FirecrawlScrapeWebsiteTool(BaseTool): ) name: str = "Firecrawl web scrape tool" description: str = "Scrape webpages using Firecrawl and return the contents" - args_schema: Type[BaseModel] = FirecrawlScrapeWebsiteToolSchema - api_key: Optional[str] = None - config: Dict[str, Any] = Field( + args_schema: type[BaseModel] = FirecrawlScrapeWebsiteToolSchema + api_key: str | None = None + config: dict[str, Any] = Field( default_factory=lambda: { "formats": ["markdown"], "onlyMainContent": True, @@ -55,12 +55,16 @@ class FirecrawlScrapeWebsiteTool(BaseTool): ) _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) - package_dependencies: List[str] = ["firecrawl-py"] - env_vars: List[EnvVar] = [ - EnvVar(name="FIRECRAWL_API_KEY", description="API key for Firecrawl services", required=True), + package_dependencies: list[str] = ["firecrawl-py"] + env_vars: list[EnvVar] = [ + EnvVar( + name="FIRECRAWL_API_KEY", + description="API key for Firecrawl services", + required=True, + ), ] - def __init__(self, api_key: Optional[str] = None, **kwargs): + def __init__(self, api_key: str | None = None, **kwargs): super().__init__(**kwargs) try: from firecrawl import FirecrawlApp # type: ignore diff --git a/packages/tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py b/packages/tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py index ba4d4c242..f166b8ef7 100644 --- a/packages/tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/firecrawl_search_tool/firecrawl_search_tool.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Dict, Optional, Type, List +from typing import TYPE_CHECKING, Any, Optional from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field, PrivateAttr @@ -36,17 +36,14 @@ class FirecrawlSearchTool(BaseTool): timeout (int): Timeout in milliseconds. Default: 60000 """ - model_config = ConfigDict( - arbitrary_types_allowed=True, validate_assignment=True, frozen=False - ) model_config = ConfigDict( arbitrary_types_allowed=True, validate_assignment=True, frozen=False ) name: str = "Firecrawl web search tool" description: str = "Search webpages using Firecrawl and return the results" - args_schema: Type[BaseModel] = FirecrawlSearchToolSchema - api_key: Optional[str] = None - config: Optional[dict[str, Any]] = Field( + args_schema: type[BaseModel] = FirecrawlSearchToolSchema + api_key: str | None = None + config: dict[str, Any] | None = Field( default_factory=lambda: { "limit": 5, "tbs": None, @@ -57,12 +54,16 @@ class FirecrawlSearchTool(BaseTool): } ) _firecrawl: Optional["FirecrawlApp"] = PrivateAttr(None) - package_dependencies: List[str] = ["firecrawl-py"] - env_vars: List[EnvVar] = [ - EnvVar(name="FIRECRAWL_API_KEY", description="API key for Firecrawl services", required=True), + package_dependencies: list[str] = ["firecrawl-py"] + env_vars: list[EnvVar] = [ + EnvVar( + name="FIRECRAWL_API_KEY", + description="API key for Firecrawl services", + required=True, + ), ] - def __init__(self, api_key: Optional[str] = None, **kwargs): + def __init__(self, api_key: str | None = None, **kwargs): super().__init__(**kwargs) self.api_key = api_key self._initialize_firecrawl() @@ -116,4 +117,3 @@ except ImportError: """ When this tool is not used, then exception can be ignored. """ - pass diff --git a/packages/tools/src/crewai_tools/tools/generate_crewai_automation_tool/generate_crewai_automation_tool.py b/packages/tools/src/crewai_tools/tools/generate_crewai_automation_tool/generate_crewai_automation_tool.py index 3d52ae3fa..b9b7bb401 100644 --- a/packages/tools/src/crewai_tools/tools/generate_crewai_automation_tool/generate_crewai_automation_tool.py +++ b/packages/tools/src/crewai_tools/tools/generate_crewai_automation_tool/generate_crewai_automation_tool.py @@ -1,5 +1,4 @@ import os -from typing import List, Optional, Type import requests from crewai.tools import BaseTool, EnvVar @@ -10,7 +9,7 @@ class GenerateCrewaiAutomationToolSchema(BaseModel): prompt: str = Field( description="The prompt to generate the CrewAI automation, e.g. 'Generate a CrewAI automation that will scrape the website and store the data in a database.'" ) - organization_id: Optional[str] = Field( + organization_id: str | None = Field( default=None, description="The identifier for the CrewAI Enterprise organization. If not specified, a default organization will be used.", ) @@ -23,16 +22,16 @@ class GenerateCrewaiAutomationTool(BaseTool): "automations based on natural language descriptions. It translates high-level requirements into " "functional CrewAI implementations." ) - args_schema: Type[BaseModel] = GenerateCrewaiAutomationToolSchema + args_schema: type[BaseModel] = GenerateCrewaiAutomationToolSchema crewai_enterprise_url: str = Field( default_factory=lambda: os.getenv("CREWAI_PLUS_URL", "https://app.crewai.com"), description="The base URL of CrewAI Enterprise. If not provided, it will be loaded from the environment variable CREWAI_PLUS_URL with default https://app.crewai.com.", ) - personal_access_token: Optional[str] = Field( + personal_access_token: str | None = Field( default_factory=lambda: os.getenv("CREWAI_PERSONAL_ACCESS_TOKEN"), description="The user's Personal Access Token to access CrewAI Enterprise API. If not provided, it will be loaded from the environment variable CREWAI_PERSONAL_ACCESS_TOKEN.", ) - env_vars: List[EnvVar] = [ + env_vars: list[EnvVar] = [ EnvVar( name="CREWAI_PERSONAL_ACCESS_TOKEN", description="Personal Access Token for CrewAI Enterprise API", @@ -57,7 +56,7 @@ class GenerateCrewaiAutomationTool(BaseTool): studio_project_url = response.json().get("url") return f"Generated CrewAI Studio project URL: {studio_project_url}" - def _get_headers(self, organization_id: Optional[str] = None) -> dict: + def _get_headers(self, organization_id: str | None = None) -> dict: headers = { "Authorization": f"Bearer {self.personal_access_token}", "Content-Type": "application/json", diff --git a/packages/tools/src/crewai_tools/tools/github_search_tool/github_search_tool.py b/packages/tools/src/crewai_tools/tools/github_search_tool/github_search_tool.py index afde4fe92..36180333e 100644 --- a/packages/tools/src/crewai_tools/tools/github_search_tool/github_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/github_search_tool/github_search_tool.py @@ -1,12 +1,5 @@ -from typing import List, Optional, Type, Any - -try: - from embedchain.loaders.github import GithubLoader - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - -from pydantic import BaseModel, Field, PrivateAttr +from crewai_tools.rag.data_types import DataType +from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -24,7 +17,7 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema): """Input for GithubSearchTool.""" github_repo: str = Field(..., description="Mandatory github you want to search") - content_types: List[str] = Field( + content_types: list[str] = Field( ..., description="Mandatory content types you want to be included search, options: [code, repo, pr, issue]", ) @@ -32,28 +25,22 @@ class GithubSearchToolSchema(FixedGithubSearchToolSchema): class GithubSearchTool(RagTool): name: str = "Search a github repo's content" - description: str = ( - "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities." - ) + description: str = "A tool that can be used to semantic search a query from a github repo's content. This is not the GitHub API, but instead a tool that can provide semantic search capabilities." summarize: bool = False gh_token: str - args_schema: Type[BaseModel] = GithubSearchToolSchema - content_types: List[str] = Field( + args_schema: type[BaseModel] = GithubSearchToolSchema + content_types: list[str] = Field( default_factory=lambda: ["code", "repo", "pr", "issue"], description="Content types you want to be included search, options: [code, repo, pr, issue]", ) - _loader: Any | None = PrivateAttr(default=None) def __init__( self, - github_repo: Optional[str] = None, - content_types: Optional[List[str]] = None, + github_repo: str | None = None, + content_types: list[str] | None = None, **kwargs, ): - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().__init__(**kwargs) - self._loader = GithubLoader(config={"token": self.gh_token}) if github_repo and content_types: self.add(repo=github_repo, content_types=content_types) @@ -64,25 +51,28 @@ class GithubSearchTool(RagTool): def add( self, repo: str, - content_types: Optional[List[str]] = None, + content_types: list[str] | None = None, ) -> None: content_types = content_types or self.content_types - super().add( - f"repo:{repo} type:{','.join(content_types)}", - data_type="github", - loader=self._loader, + f"https://github.com/{repo}", + data_type=DataType.GITHUB, + metadata={"content_types": content_types, "gh_token": self.gh_token}, ) def _run( self, search_query: str, - github_repo: Optional[str] = None, - content_types: Optional[List[str]] = None, + github_repo: str | None = None, + content_types: list[str] | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if github_repo: self.add( repo=github_repo, content_types=content_types, ) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py b/packages/tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py index a2571b94b..3ef120954 100644 --- a/packages/tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py +++ b/packages/tools/src/crewai_tools/tools/hyperbrowser_load_tool/hyperbrowser_load_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, Optional, Type, Dict, Literal, Union, List +from typing import Any, Literal from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field @@ -7,8 +7,13 @@ from pydantic import BaseModel, Field class HyperbrowserLoadToolSchema(BaseModel): url: str = Field(description="Website URL") - operation: Literal['scrape', 'crawl'] = Field(description="Operation to perform on the website. Either 'scrape' or 'crawl'") - params: Optional[Dict] = Field(description="Optional params for scrape or crawl. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait") + operation: Literal["scrape", "crawl"] = Field( + description="Operation to perform on the website. Either 'scrape' or 'crawl'" + ) + params: dict | None = Field( + description="Optional params for scrape or crawl. For more information on the supported params, visit https://docs.hyperbrowser.ai/reference/sdks/python/scrape#start-scrape-job-and-wait or https://docs.hyperbrowser.ai/reference/sdks/python/crawl#start-crawl-job-and-wait" + ) + class HyperbrowserLoadTool(BaseTool): """HyperbrowserLoadTool. @@ -20,19 +25,24 @@ class HyperbrowserLoadTool(BaseTool): Args: api_key: The Hyperbrowser API key, can be set as an environment variable `HYPERBROWSER_API_KEY` or passed directly """ + name: str = "Hyperbrowser web load tool" description: str = "Scrape or crawl a website using Hyperbrowser and return the contents in properly formatted markdown or html" - args_schema: Type[BaseModel] = HyperbrowserLoadToolSchema - api_key: Optional[str] = None - hyperbrowser: Optional[Any] = None - package_dependencies: List[str] = ["hyperbrowser"] - env_vars: List[EnvVar] = [ - EnvVar(name="HYPERBROWSER_API_KEY", description="API key for Hyperbrowser services", required=False), + args_schema: type[BaseModel] = HyperbrowserLoadToolSchema + api_key: str | None = None + hyperbrowser: Any | None = None + package_dependencies: list[str] = ["hyperbrowser"] + env_vars: list[EnvVar] = [ + EnvVar( + name="HYPERBROWSER_API_KEY", + description="API key for Hyperbrowser services", + required=False, + ), ] - def __init__(self, api_key: Optional[str] = None, **kwargs): + def __init__(self, api_key: str | None = None, **kwargs): super().__init__(**kwargs) - self.api_key = api_key or os.getenv('HYPERBROWSER_API_KEY') + self.api_key = api_key or os.getenv("HYPERBROWSER_API_KEY") if not api_key: raise ValueError( "`api_key` is required, please set the `HYPERBROWSER_API_KEY` environment variable or pass it directly" @@ -41,18 +51,22 @@ class HyperbrowserLoadTool(BaseTool): try: from hyperbrowser import Hyperbrowser except ImportError: - raise ImportError("`hyperbrowser` package not found, please run `pip install hyperbrowser`") + raise ImportError( + "`hyperbrowser` package not found, please run `pip install hyperbrowser`" + ) if not self.api_key: - raise ValueError("HYPERBROWSER_API_KEY is not set. Please provide it either via the constructor with the `api_key` argument or by setting the HYPERBROWSER_API_KEY environment variable.") + raise ValueError( + "HYPERBROWSER_API_KEY is not set. Please provide it either via the constructor with the `api_key` argument or by setting the HYPERBROWSER_API_KEY environment variable." + ) self.hyperbrowser = Hyperbrowser(api_key=self.api_key) - def _prepare_params(self, params: Dict) -> Dict: + def _prepare_params(self, params: dict) -> dict: """Prepare session and scrape options parameters.""" try: - from hyperbrowser.models.session import CreateSessionParams from hyperbrowser.models.scrape import ScrapeOptions + from hyperbrowser.models.session import CreateSessionParams except ImportError: raise ImportError( "`hyperbrowser` package not found, please run `pip install hyperbrowser`" @@ -70,17 +84,24 @@ class HyperbrowserLoadTool(BaseTool): params["scrape_options"] = ScrapeOptions(**params["scrape_options"]) return params - def _extract_content(self, data: Union[Any, None]): + def _extract_content(self, data: Any | None): """Extract content from response data.""" content = "" if data: content = data.markdown or data.html or "" return content - def _run(self, url: str, operation: Literal['scrape', 'crawl'] = 'scrape', params: Optional[Dict] = {}): + def _run( + self, + url: str, + operation: Literal["scrape", "crawl"] = "scrape", + params: dict | None = None, + ): + if params is None: + params = {} try: - from hyperbrowser.models.scrape import StartScrapeJobParams from hyperbrowser.models.crawl import StartCrawlJobParams + from hyperbrowser.models.scrape import StartScrapeJobParams except ImportError: raise ImportError( "`hyperbrowser` package not found, please run `pip install hyperbrowser`" @@ -88,20 +109,18 @@ class HyperbrowserLoadTool(BaseTool): params = self._prepare_params(params) - if operation == 'scrape': + if operation == "scrape": scrape_params = StartScrapeJobParams(url=url, **params) scrape_resp = self.hyperbrowser.scrape.start_and_wait(scrape_params) - content = self._extract_content(scrape_resp.data) - return content - else: - crawl_params = StartCrawlJobParams(url=url, **params) - crawl_resp = self.hyperbrowser.crawl.start_and_wait(crawl_params) - content = "" - if crawl_resp.data: - for page in crawl_resp.data: - page_content = self._extract_content(page) - if page_content: - content += ( - f"\n{'-'*50}\nUrl: {page.url}\nContent:\n{page_content}\n" - ) - return content + return self._extract_content(scrape_resp.data) + crawl_params = StartCrawlJobParams(url=url, **params) + crawl_resp = self.hyperbrowser.crawl.start_and_wait(crawl_params) + content = "" + if crawl_resp.data: + for page in crawl_resp.data: + page_content = self._extract_content(page) + if page_content: + content += ( + f"\n{'-' * 50}\nUrl: {page.url}\nContent:\n{page_content}\n" + ) + return content diff --git a/packages/tools/src/crewai_tools/tools/invoke_crewai_automation_tool/invoke_crewai_automation_tool.py b/packages/tools/src/crewai_tools/tools/invoke_crewai_automation_tool/invoke_crewai_automation_tool.py index 09b076cc1..50db8acaf 100644 --- a/packages/tools/src/crewai_tools/tools/invoke_crewai_automation_tool/invoke_crewai_automation_tool.py +++ b/packages/tools/src/crewai_tools/tools/invoke_crewai_automation_tool/invoke_crewai_automation_tool.py @@ -1,23 +1,27 @@ +import time +from typing import Any + +import requests from crewai.tools import BaseTool from pydantic import BaseModel, Field, create_model -from typing import Any, Type -import requests -import time + class InvokeCrewAIAutomationInput(BaseModel): """Input schema for InvokeCrewAIAutomationTool.""" + prompt: str = Field(..., description="The prompt or query to send to the crew") + class InvokeCrewAIAutomationTool(BaseTool): """ A CrewAI tool for invoking external crew/flows APIs. - + This tool provides CrewAI Platform API integration with external crew services, supporting: - Dynamic input schema configuration - Automatic polling for task completion - Bearer token authentication - Comprehensive error handling - + Example: Basic usage: >>> tool = InvokeCrewAIAutomationTool( @@ -26,7 +30,7 @@ class InvokeCrewAIAutomationTool(BaseTool): ... crew_name="My Crew", ... crew_description="Description of what the crew does" ... ) - + With custom inputs: >>> custom_inputs = { ... "param1": Field(..., description="Description of param1"), @@ -39,7 +43,7 @@ class InvokeCrewAIAutomationTool(BaseTool): ... crew_description="Description of what the crew does", ... crew_inputs=custom_inputs ... ) - + Example: >>> tools=[ ... InvokeCrewAIAutomationTool( @@ -53,25 +57,27 @@ class InvokeCrewAIAutomationTool(BaseTool): ... ) ... ] """ + name: str = "invoke_amp_automation" description: str = "Invokes an CrewAI Platform Automation using API" - args_schema: Type[BaseModel] = InvokeCrewAIAutomationInput - + args_schema: type[BaseModel] = InvokeCrewAIAutomationInput + crew_api_url: str crew_bearer_token: str - max_polling_time: int = 10 * 60 # 10 minutes - + max_polling_time: int = 10 * 60 # 10 minutes + def __init__( - self, - crew_api_url: str, - crew_bearer_token: str, + self, + crew_api_url: str, + crew_bearer_token: str, crew_name: str, crew_description: str, max_polling_time: int = 10 * 60, - crew_inputs: dict[str, Any] = None): + crew_inputs: dict[str, Any] | None = None, + ): """ Initialize the InvokeCrewAIAutomationTool. - + Args: crew_api_url: Base URL of the crew API service crew_bearer_token: Bearer token for API authentication @@ -84,7 +90,7 @@ class InvokeCrewAIAutomationTool(BaseTool): if crew_inputs: # Start with the base prompt field fields = {} - + # Add custom fields for field_name, field_def in crew_inputs.items(): if isinstance(field_def, tuple): @@ -92,12 +98,12 @@ class InvokeCrewAIAutomationTool(BaseTool): else: # Assume it's a Field object, extract type from annotation if available fields[field_name] = (str, field_def) - + # Create dynamic model - args_schema = create_model('DynamicInvokeCrewAIAutomationInput', **fields) + args_schema = create_model("DynamicInvokeCrewAIAutomationInput", **fields) else: args_schema = InvokeCrewAIAutomationInput - + # Initialize the parent class with proper field values super().__init__( name=crew_name, @@ -105,7 +111,7 @@ class InvokeCrewAIAutomationTool(BaseTool): args_schema=args_schema, crew_api_url=crew_api_url, crew_bearer_token=crew_bearer_token, - max_polling_time=max_polling_time + max_polling_time=max_polling_time, ) def _kickoff_crew(self, inputs: dict[str, Any]) -> dict[str, Any]: @@ -125,8 +131,7 @@ class InvokeCrewAIAutomationTool(BaseTool): }, json={"inputs": inputs}, ) - response_json = response.json() - return response_json + return response.json() def _get_crew_status(self, crew_id: str) -> dict[str, Any]: """Get the status of a crew task @@ -150,27 +155,27 @@ class InvokeCrewAIAutomationTool(BaseTool): """Execute the crew invocation tool.""" if kwargs is None: kwargs = {} - + # Start the crew response = self._kickoff_crew(inputs=kwargs) - + if response.get("kickoff_id") is None: return f"Error: Failed to kickoff crew. Response: {response}" kickoff_id = response.get("kickoff_id") - + # Poll for completion for i in range(self.max_polling_time): try: status_response = self._get_crew_status(crew_id=kickoff_id) if status_response.get("state", "").lower() == "success": return status_response.get("result", "No result returned") - elif status_response.get("state", "").lower() == "failed": + if status_response.get("state", "").lower() == "failed": return f"Error: Crew task failed. Response: {status_response}" except Exception as e: if i == self.max_polling_time - 1: # Last attempt return f"Error: Failed to get crew status after {self.max_polling_time} attempts. Last error: {e}" - + time.sleep(1) - + return f"Error: Crew did not complete within {self.max_polling_time} seconds" diff --git a/packages/tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py b/packages/tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py index 86f771cd0..a0e585f63 100644 --- a/packages/tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py +++ b/packages/tools/src/crewai_tools/tools/jina_scrape_website_tool/jina_scrape_website_tool.py @@ -1,5 +1,3 @@ -from typing import Optional, Type - import requests from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -14,16 +12,16 @@ class JinaScrapeWebsiteToolInput(BaseModel): class JinaScrapeWebsiteTool(BaseTool): name: str = "JinaScrapeWebsiteTool" description: str = "A tool that can be used to read a website content using Jina.ai reader and return markdown content." - args_schema: Type[BaseModel] = JinaScrapeWebsiteToolInput - website_url: Optional[str] = None - api_key: Optional[str] = None + args_schema: type[BaseModel] = JinaScrapeWebsiteToolInput + website_url: str | None = None + api_key: str | None = None headers: dict = {} def __init__( self, - website_url: Optional[str] = None, - api_key: Optional[str] = None, - custom_headers: Optional[dict] = None, + website_url: str | None = None, + api_key: str | None = None, + custom_headers: dict | None = None, **kwargs, ): super().__init__(**kwargs) @@ -38,7 +36,7 @@ class JinaScrapeWebsiteTool(BaseTool): if api_key is not None: self.headers["Authorization"] = f"Bearer {api_key}" - def _run(self, website_url: Optional[str] = None) -> str: + def _run(self, website_url: str | None = None) -> str: url = website_url or self.website_url if not url: raise ValueError( diff --git a/packages/tools/src/crewai_tools/tools/json_search_tool/json_search_tool.py b/packages/tools/src/crewai_tools/tools/json_search_tool/json_search_tool.py index 820323eec..49e5e4ffb 100644 --- a/packages/tools/src/crewai_tools/tools/json_search_tool/json_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/json_search_tool/json_search_tool.py @@ -1,5 +1,3 @@ -from typing import Optional, Type - from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -27,9 +25,9 @@ class JSONSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a JSON's content." ) - args_schema: Type[BaseModel] = JSONSearchToolSchema + args_schema: type[BaseModel] = JSONSearchToolSchema - def __init__(self, json_path: Optional[str] = None, **kwargs): + def __init__(self, json_path: str | None = None, **kwargs): super().__init__(**kwargs) if json_path is not None: self.add(json_path) @@ -40,8 +38,12 @@ class JSONSearchTool(RagTool): def _run( self, search_query: str, - json_path: Optional[str] = None, + json_path: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if json_path is not None: self.add(json_path) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/linkup/linkup_search_tool.py b/packages/tools/src/crewai_tools/tools/linkup/linkup_search_tool.py index 634ba2863..742aef1ed 100644 --- a/packages/tools/src/crewai_tools/tools/linkup/linkup_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/linkup/linkup_search_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, List +from typing import Any from crewai.tools import BaseTool, EnvVar @@ -20,12 +20,8 @@ class LinkupSearchTool(BaseTool): "Performs an API call to Linkup to retrieve contextual information." ) _client: LinkupClient = PrivateAttr() # type: ignore - description: str = ( - "Performs an API call to Linkup to retrieve contextual information." - ) - _client: LinkupClient = PrivateAttr() # type: ignore - package_dependencies: List[str] = ["linkup-sdk"] - env_vars: List[EnvVar] = [ + package_dependencies: list[str] = ["linkup-sdk"] + env_vars: list[EnvVar] = [ EnvVar(name="LINKUP_API_KEY", description="API key for Linkup", required=True), ] diff --git a/packages/tools/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py b/packages/tools/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py index ba2605816..6bf4b9796 100644 --- a/packages/tools/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py +++ b/packages/tools/src/crewai_tools/tools/llamaindex_tool/llamaindex_tool.py @@ -1,4 +1,4 @@ -from typing import Any, Optional, Type, cast +from typing import Any, cast from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -36,7 +36,7 @@ class LlamaIndexTool(BaseTool): raise ValueError( "The LlamaIndex tool does not have an fn_schema specified." ) - args_schema = cast(Type[BaseModel], tool.metadata.fn_schema) + args_schema = cast(type[BaseModel], tool.metadata.fn_schema) return cls( name=tool.metadata.name, @@ -50,8 +50,8 @@ class LlamaIndexTool(BaseTool): def from_query_engine( cls, query_engine: Any, - name: Optional[str] = None, - description: Optional[str] = None, + name: str | None = None, + description: str | None = None, return_direct: bool = False, **kwargs: Any, ) -> "LlamaIndexTool": diff --git a/packages/tools/src/crewai_tools/tools/mdx_search_tool/mdx_search_tool.py b/packages/tools/src/crewai_tools/tools/mdx_search_tool/mdx_search_tool.py index 807da62fe..4ca888524 100644 --- a/packages/tools/src/crewai_tools/tools/mdx_search_tool/mdx_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/mdx_search_tool/mdx_search_tool.py @@ -1,13 +1,6 @@ -from typing import Optional, Type - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - from ..rag.rag_tool import RagTool @@ -31,9 +24,9 @@ class MDXSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a MDX's content." ) - args_schema: Type[BaseModel] = MDXSearchToolSchema + args_schema: type[BaseModel] = MDXSearchToolSchema - def __init__(self, mdx: Optional[str] = None, **kwargs): + def __init__(self, mdx: str | None = None, **kwargs): super().__init__(**kwargs) if mdx is not None: self.add(mdx) @@ -42,15 +35,17 @@ class MDXSearchTool(RagTool): self._generate_description() def add(self, mdx: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().add(mdx, data_type=DataType.MDX) def _run( self, search_query: str, - mdx: Optional[str] = None, + mdx: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if mdx is not None: self.add(mdx) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/__init__.py b/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/__init__.py index c7e991472..b07a05e66 100644 --- a/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/__init__.py +++ b/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/__init__.py @@ -5,7 +5,7 @@ from .vector_search import ( ) __all__ = [ + "MongoDBToolSchema", "MongoDBVectorSearchConfig", "MongoDBVectorSearchTool", - "MongoDBToolSchema", ] diff --git a/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/utils.py b/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/utils.py index a66586f6f..63e6463c0 100644 --- a/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/utils.py +++ b/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/utils.py @@ -1,7 +1,8 @@ from __future__ import annotations +from collections.abc import Callable from time import monotonic, sleep -from typing import TYPE_CHECKING, Any, Callable, Dict, List, Optional +from typing import TYPE_CHECKING, Any if TYPE_CHECKING: from pymongo.collection import Collection @@ -11,9 +12,9 @@ def _vector_search_index_definition( dimensions: int, path: str, similarity: str, - filters: Optional[List[str]] = None, + filters: list[str] | None = None, **kwargs: Any, -) -> Dict[str, Any]: +) -> dict[str, Any]: # https://www.mongodb.com/docs/atlas/atlas-vector-search/vector-search-type/ fields = [ { @@ -37,9 +38,9 @@ def create_vector_search_index( dimensions: int, path: str, similarity: str, - filters: Optional[List[str]] = None, + filters: list[str] | None = None, *, - wait_until_complete: Optional[float] = None, + wait_until_complete: float | None = None, **kwargs: Any, ) -> None: """Experimental Utility function to create a vector search index @@ -60,7 +61,7 @@ def create_vector_search_index( if collection.name not in collection.database.list_collection_names(): collection.database.create_collection(collection.name) - result = collection.create_search_index( + collection.create_search_index( SearchIndexModel( definition=_vector_search_index_definition( dimensions=dimensions, diff --git a/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/vector_search.py b/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/vector_search.py index 4112aa500..e09a13138 100644 --- a/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/vector_search.py +++ b/packages/tools/src/crewai_tools/tools/mongodb_vector_search_tool/vector_search.py @@ -1,7 +1,8 @@ import os +from collections.abc import Iterable from importlib.metadata import version from logging import getLogger -from typing import Any, Dict, Iterable, List, Optional, Type +from typing import Any from crewai.tools import BaseTool, EnvVar from openai import AzureOpenAI, Client @@ -12,7 +13,7 @@ from crewai_tools.tools.mongodb_vector_search_tool.utils import ( ) try: - import pymongo # noqa: F403 + import pymongo MONGODB_AVAILABLE = True except ImportError: @@ -24,14 +25,12 @@ logger = getLogger(__name__) class MongoDBVectorSearchConfig(BaseModel): """Configuration for MongoDB vector search queries.""" - limit: Optional[int] = Field( - default=4, description="number of documents to return." - ) - pre_filter: Optional[dict[str, Any]] = Field( + limit: int | None = Field(default=4, description="number of documents to return.") + pre_filter: dict[str, Any] | None = Field( default=None, description="List of MQL match expressions comparing an indexed field", ) - post_filter_pipeline: Optional[list[dict]] = Field( + post_filter_pipeline: list[dict] | None = Field( default=None, description="Pipeline of MongoDB aggregation stages to filter/process results after $vectorSearch.", ) @@ -60,8 +59,8 @@ class MongoDBVectorSearchTool(BaseTool): name: str = "MongoDBVectorSearchTool" description: str = "A tool to perfrom a vector search on a MongoDB database for relevant information on internal documents." - args_schema: Type[BaseModel] = MongoDBToolSchema - query_config: Optional[MongoDBVectorSearchConfig] = Field( + args_schema: type[BaseModel] = MongoDBToolSchema + query_config: MongoDBVectorSearchConfig | None = Field( default=None, description="MongoDB Vector Search query configuration" ) embedding_model: str = Field( @@ -89,7 +88,7 @@ class MongoDBVectorSearchTool(BaseTool): default=1536, description="Number of dimensions in the embedding vector", ) - env_vars: List[EnvVar] = [ + env_vars: list[EnvVar] = [ EnvVar( name="BROWSERBASE_API_KEY", description="API key for Browserbase services", @@ -101,7 +100,7 @@ class MongoDBVectorSearchTool(BaseTool): required=False, ), ] - package_dependencies: List[str] = ["mongdb"] + package_dependencies: list[str] = ["mongdb"] def __init__(self, **kwargs): super().__init__(**kwargs) @@ -166,11 +165,11 @@ class MongoDBVectorSearchTool(BaseTool): def add_texts( self, texts: Iterable[str], - metadatas: Optional[List[Dict[str, Any]]] = None, - ids: Optional[List[str]] = None, + metadatas: list[dict[str, Any]] | None = None, + ids: list[str] | None = None, batch_size: int = 100, **kwargs: Any, - ) -> List[str]: + ) -> list[str]: """Add texts, create embeddings, and add to the Collection and index. Important notes on ids: @@ -203,7 +202,7 @@ class MongoDBVectorSearchTool(BaseTool): metadatas_batch = [] size = 0 i = 0 - for j, (text, metadata) in enumerate(zip(texts, _metadatas)): + for j, (text, metadata) in enumerate(zip(texts, _metadatas, strict=False)): size += len(text) + len(metadata) texts_batch.append(text) metadatas_batch.append(metadata) @@ -223,7 +222,7 @@ class MongoDBVectorSearchTool(BaseTool): result_ids.extend(batch_res) return result_ids - def _embed_texts(self, texts: List[str]) -> List[List[float]]: + def _embed_texts(self, texts: list[str]) -> list[list[float]]: return [ i.embedding for i in self._openai_client.embeddings.create( @@ -235,10 +234,10 @@ class MongoDBVectorSearchTool(BaseTool): def _bulk_embed_and_insert_texts( self, - texts: List[str], - metadatas: List[dict], - ids: List[str], - ) -> List[str]: + texts: list[str], + metadatas: list[dict], + ids: list[str], + ) -> list[str]: """Bulk insert single batch of texts, embeddings, and ids.""" from bson import ObjectId from pymongo.operations import ReplaceOne @@ -254,7 +253,9 @@ class MongoDBVectorSearchTool(BaseTool): self.embedding_key: embedding, **m, } - for i, t, m, embedding in zip(ids, texts, metadatas, embeddings) + for i, t, m, embedding in zip( + ids, texts, metadatas, embeddings, strict=False + ) ] operations = [ReplaceOne({"_id": doc["_id"]}, doc, upsert=True) for doc in docs] # insert the documents in MongoDB Atlas diff --git a/packages/tools/src/crewai_tools/tools/multion_tool/multion_tool.py b/packages/tools/src/crewai_tools/tools/multion_tool/multion_tool.py index cf652c324..5324e9abe 100644 --- a/packages/tools/src/crewai_tools/tools/multion_tool/multion_tool.py +++ b/packages/tools/src/crewai_tools/tools/multion_tool/multion_tool.py @@ -1,7 +1,7 @@ """Multion tool spec.""" import os -from typing import Any, Optional, List +from typing import Any from crewai.tools import BaseTool, EnvVar @@ -13,18 +13,20 @@ class MultiOnTool(BaseTool): description: str = """Multion gives the ability for LLMs to control web browsers using natural language instructions. If the status is 'CONTINUE', reissue the same instruction to continue execution """ - multion: Optional[Any] = None - session_id: Optional[str] = None + multion: Any | None = None + session_id: str | None = None local: bool = False max_steps: int = 3 - package_dependencies: List[str] = ["multion"] - env_vars: List[EnvVar] = [ - EnvVar(name="MULTION_API_KEY", description="API key for Multion", required=True), + package_dependencies: list[str] = ["multion"] + env_vars: list[EnvVar] = [ + EnvVar( + name="MULTION_API_KEY", description="API key for Multion", required=True + ), ] def __init__( self, - api_key: Optional[str] = None, + api_key: str | None = None, local: bool = False, max_steps: int = 3, **kwargs, diff --git a/packages/tools/src/crewai_tools/tools/mysql_search_tool/mysql_search_tool.py b/packages/tools/src/crewai_tools/tools/mysql_search_tool/mysql_search_tool.py index 8c2c5ef5d..383e464b0 100644 --- a/packages/tools/src/crewai_tools/tools/mysql_search_tool/mysql_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/mysql_search_tool/mysql_search_tool.py @@ -1,11 +1,6 @@ -from typing import Any, Type - -try: - from embedchain.loaders.mysql import MySQLLoader - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False +from typing import Any +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -23,16 +18,12 @@ class MySQLSearchToolSchema(BaseModel): class MySQLSearchTool(RagTool): name: str = "Search a database's table content" description: str = "A tool that can be used to semantic search a query from a database table's content." - args_schema: Type[BaseModel] = MySQLSearchToolSchema + args_schema: type[BaseModel] = MySQLSearchToolSchema db_uri: str = Field(..., description="Mandatory database URI") def __init__(self, table_name: str, **kwargs): - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().__init__(**kwargs) - kwargs["data_type"] = "mysql" - kwargs["loader"] = MySQLLoader(config=dict(url=self.db_uri)) - self.add(table_name) + self.add(table_name, data_type=DataType.MYSQL, metadata={"db_uri": self.db_uri}) self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content." self._generate_description() @@ -46,6 +37,10 @@ class MySQLSearchTool(RagTool): def _run( self, search_query: str, + similarity_threshold: float | None = None, + limit: int | None = None, **kwargs: Any, ) -> Any: - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/nl2sql/nl2sql_tool.py b/packages/tools/src/crewai_tools/tools/nl2sql/nl2sql_tool.py index f3d892d1a..2a4d07a04 100644 --- a/packages/tools/src/crewai_tools/tools/nl2sql/nl2sql_tool.py +++ b/packages/tools/src/crewai_tools/tools/nl2sql/nl2sql_tool.py @@ -1,4 +1,4 @@ -from typing import Any, Type, Union +from typing import Any from crewai.tools import BaseTool from pydantic import BaseModel, Field @@ -6,6 +6,7 @@ from pydantic import BaseModel, Field try: from sqlalchemy import create_engine, text from sqlalchemy.orm import sessionmaker + SQLALCHEMY_AVAILABLE = True except ImportError: SQLALCHEMY_AVAILABLE = False @@ -27,18 +28,20 @@ class NL2SQLTool(BaseTool): ) tables: list = [] columns: dict = {} - args_schema: Type[BaseModel] = NL2SQLToolInput + args_schema: type[BaseModel] = NL2SQLToolInput def model_post_init(self, __context: Any) -> None: if not SQLALCHEMY_AVAILABLE: - raise ImportError("sqlalchemy is not installed. Please install it with `pip install crewai-tools[sqlalchemy]`") + raise ImportError( + "sqlalchemy is not installed. Please install it with `pip install crewai-tools[sqlalchemy]`" + ) data = {} tables = self._fetch_available_tables() for table in tables: table_columns = self._fetch_all_available_columns(table["table_name"]) - data[f'{table["table_name"]}_columns'] = table_columns + data[f"{table['table_name']}_columns"] = table_columns self.tables = tables self.columns = data @@ -65,9 +68,11 @@ class NL2SQLTool(BaseTool): return data - def execute_sql(self, sql_query: str) -> Union[list, str]: + def execute_sql(self, sql_query: str) -> list | str: if not SQLALCHEMY_AVAILABLE: - raise ImportError("sqlalchemy is not installed. Please install it with `pip install crewai-tools[sqlalchemy]`") + raise ImportError( + "sqlalchemy is not installed. Please install it with `pip install crewai-tools[sqlalchemy]`" + ) engine = create_engine(self.db_uri) Session = sessionmaker(bind=engine) @@ -78,10 +83,10 @@ class NL2SQLTool(BaseTool): if result.returns_rows: columns = result.keys() - data = [dict(zip(columns, row)) for row in result.fetchall()] - return data - else: - return f"Query {sql_query} executed successfully" + return [ + dict(zip(columns, row, strict=False)) for row in result.fetchall() + ] + return f"Query {sql_query} executed successfully" except Exception as e: session.rollback() diff --git a/packages/tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py b/packages/tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py index aabe0ffbd..646ec5670 100644 --- a/packages/tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py +++ b/packages/tools/src/crewai_tools/tools/ocr_tool/ocr_tool.py @@ -5,18 +5,15 @@ This tool provides functionality for extracting text from images using supported """ import base64 -from typing import Optional, Type -from openai import OpenAI -from pydantic import BaseModel, PrivateAttr - -from crewai.tools.base_tool import BaseTool from crewai import LLM +from crewai.tools.base_tool import BaseTool +from pydantic import BaseModel, PrivateAttr class OCRToolSchema(BaseModel): """Input schema for Optical Character Recognition Tool. - + Attributes: image_path_url (str): Path to a local image file or URL of an image. For local files, provide the absolute or relative path. @@ -42,12 +39,10 @@ class OCRTool(BaseTool): """ name: str = "Optical Character Recognition Tool" - description: str = ( - "This tool uses an LLM's API to extract text from an image file." - ) - _llm: Optional[LLM] = PrivateAttr(default=None) + description: str = "This tool uses an LLM's API to extract text from an image file." + _llm: LLM | None = PrivateAttr(default=None) - args_schema: Type[BaseModel] = OCRToolSchema + args_schema: type[BaseModel] = OCRToolSchema def __init__(self, llm: LLM = None, **kwargs): """Initialize the OCR tool. @@ -93,11 +88,11 @@ class OCRTool(BaseTool): else: base64_image = self._encode_image(image_path_url) image_data = f"data:image/jpeg;base64,{base64_image}" - - messages=[ + + messages = [ { "role": "system", - "content": "You are an expert OCR specialist. Extract complete text from the provided image. Provide the result as a raw text." + "content": "You are an expert OCR specialist. Extract complete text from the provided image. Provide the result as a raw text.", }, { "role": "user", @@ -107,11 +102,10 @@ class OCRTool(BaseTool): "image_url": {"url": image_data}, } ], - } + }, ] - response = self._llm.call(messages=messages) - return response + return self._llm.call(messages=messages) def _encode_image(self, image_path: str): """Encode an image file to base64 format. diff --git a/packages/tools/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py b/packages/tools/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py index 1d4146fcb..6a43a8846 100644 --- a/packages/tools/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py +++ b/packages/tools/src/crewai_tools/tools/oxylabs_amazon_product_scraper_tool/oxylabs_amazon_product_scraper_tool.py @@ -2,7 +2,7 @@ import json import os from importlib.metadata import version from platform import architecture, python_version -from typing import Any, List, Type +from typing import Any from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field @@ -19,7 +19,7 @@ except ImportError: OXYLABS_AVAILABLE = False -__all__ = ["OxylabsAmazonProductScraperTool", "OxylabsAmazonProductScraperConfig"] +__all__ = ["OxylabsAmazonProductScraperConfig", "OxylabsAmazonProductScraperTool"] class OxylabsAmazonProductScraperArgs(BaseModel): @@ -68,14 +68,18 @@ class OxylabsAmazonProductScraperTool(BaseTool): ) name: str = "Oxylabs Amazon Product Scraper tool" description: str = "Scrape Amazon product pages with Oxylabs Amazon Product Scraper" - args_schema: Type[BaseModel] = OxylabsAmazonProductScraperArgs + args_schema: type[BaseModel] = OxylabsAmazonProductScraperArgs oxylabs_api: RealtimeClient config: OxylabsAmazonProductScraperConfig - package_dependencies: List[str] = ["oxylabs"] - env_vars: List[EnvVar] = [ - EnvVar(name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True), - EnvVar(name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True), + package_dependencies: list[str] = ["oxylabs"] + env_vars: list[EnvVar] = [ + EnvVar( + name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True + ), + EnvVar( + name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True + ), ] def __init__( diff --git a/packages/tools/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py b/packages/tools/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py index e659d244f..8eb186160 100644 --- a/packages/tools/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py +++ b/packages/tools/src/crewai_tools/tools/oxylabs_amazon_search_scraper_tool/oxylabs_amazon_search_scraper_tool.py @@ -2,7 +2,7 @@ import json import os from importlib.metadata import version from platform import architecture, python_version -from typing import Any, List, Type +from typing import Any from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field @@ -19,7 +19,7 @@ except ImportError: OXYLABS_AVAILABLE = False -__all__ = ["OxylabsAmazonSearchScraperTool", "OxylabsAmazonSearchScraperConfig"] +__all__ = ["OxylabsAmazonSearchScraperConfig", "OxylabsAmazonSearchScraperTool"] class OxylabsAmazonSearchScraperArgs(BaseModel): @@ -70,14 +70,18 @@ class OxylabsAmazonSearchScraperTool(BaseTool): ) name: str = "Oxylabs Amazon Search Scraper tool" description: str = "Scrape Amazon search results with Oxylabs Amazon Search Scraper" - args_schema: Type[BaseModel] = OxylabsAmazonSearchScraperArgs + args_schema: type[BaseModel] = OxylabsAmazonSearchScraperArgs oxylabs_api: RealtimeClient config: OxylabsAmazonSearchScraperConfig - package_dependencies: List[str] = ["oxylabs"] - env_vars: List[EnvVar] = [ - EnvVar(name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True), - EnvVar(name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True), + package_dependencies: list[str] = ["oxylabs"] + env_vars: list[EnvVar] = [ + EnvVar( + name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True + ), + EnvVar( + name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True + ), ] def __init__( diff --git a/packages/tools/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py b/packages/tools/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py index 1096df098..a8b7ca1f6 100644 --- a/packages/tools/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py +++ b/packages/tools/src/crewai_tools/tools/oxylabs_google_search_scraper_tool/oxylabs_google_search_scraper_tool.py @@ -2,7 +2,7 @@ import json import os from importlib.metadata import version from platform import architecture, python_version -from typing import Any, List, Type +from typing import Any from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field @@ -19,7 +19,7 @@ except ImportError: OXYLABS_AVAILABLE = False -__all__ = ["OxylabsGoogleSearchScraperTool", "OxylabsGoogleSearchScraperConfig"] +__all__ = ["OxylabsGoogleSearchScraperConfig", "OxylabsGoogleSearchScraperTool"] class OxylabsGoogleSearchScraperArgs(BaseModel): @@ -73,14 +73,18 @@ class OxylabsGoogleSearchScraperTool(BaseTool): ) name: str = "Oxylabs Google Search Scraper tool" description: str = "Scrape Google Search results with Oxylabs Google Search Scraper" - args_schema: Type[BaseModel] = OxylabsGoogleSearchScraperArgs + args_schema: type[BaseModel] = OxylabsGoogleSearchScraperArgs oxylabs_api: RealtimeClient config: OxylabsGoogleSearchScraperConfig - package_dependencies: List[str] = ["oxylabs"] - env_vars: List[EnvVar] = [ - EnvVar(name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True), - EnvVar(name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True), + package_dependencies: list[str] = ["oxylabs"] + env_vars: list[EnvVar] = [ + EnvVar( + name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True + ), + EnvVar( + name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True + ), ] def __init__( diff --git a/packages/tools/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py b/packages/tools/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py index 05b174500..993277cc7 100644 --- a/packages/tools/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py +++ b/packages/tools/src/crewai_tools/tools/oxylabs_universal_scraper_tool/oxylabs_universal_scraper_tool.py @@ -2,7 +2,7 @@ import json import os from importlib.metadata import version from platform import architecture, python_version -from typing import Any, List, Type +from typing import Any from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, ConfigDict, Field @@ -18,7 +18,7 @@ except ImportError: OXYLABS_AVAILABLE = False -__all__ = ["OxylabsUniversalScraperTool", "OxylabsUniversalScraperConfig"] +__all__ = ["OxylabsUniversalScraperConfig", "OxylabsUniversalScraperTool"] class OxylabsUniversalScraperArgs(BaseModel): @@ -64,14 +64,18 @@ class OxylabsUniversalScraperTool(BaseTool): ) name: str = "Oxylabs Universal Scraper tool" description: str = "Scrape any url with Oxylabs Universal Scraper" - args_schema: Type[BaseModel] = OxylabsUniversalScraperArgs + args_schema: type[BaseModel] = OxylabsUniversalScraperArgs oxylabs_api: RealtimeClient config: OxylabsUniversalScraperConfig - package_dependencies: List[str] = ["oxylabs"] - env_vars: List[EnvVar] = [ - EnvVar(name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True), - EnvVar(name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True), + package_dependencies: list[str] = ["oxylabs"] + env_vars: list[EnvVar] = [ + EnvVar( + name="OXYLABS_USERNAME", description="Username for Oxylabs", required=True + ), + EnvVar( + name="OXYLABS_PASSWORD", description="Password for Oxylabs", required=True + ), ] def __init__( diff --git a/packages/tools/src/crewai_tools/tools/parallel_tools/parallel_search_tool.py b/packages/tools/src/crewai_tools/tools/parallel_tools/parallel_search_tool.py index d695bac9d..e9f26d9a1 100644 --- a/packages/tools/src/crewai_tools/tools/parallel_tools/parallel_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/parallel_tools/parallel_search_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, Dict, List, Optional, Type, Annotated +from typing import Annotated, Any import requests from crewai.tools import BaseTool, EnvVar @@ -12,12 +12,12 @@ class ParallelSearchInput(BaseModel): At least one of objective or search_queries is required. """ - objective: Optional[str] = Field( + objective: str | None = Field( None, description="Natural-language goal for the web research (<=5000 chars)", max_length=5000, ) - search_queries: Optional[List[Annotated[str, Field(max_length=200)]]] = Field( + search_queries: list[Annotated[str, Field(max_length=200)]] | None = Field( default=None, description="Optional list of keyword queries (<=5 items, each <=200 chars)", min_length=1, @@ -39,7 +39,7 @@ class ParallelSearchInput(BaseModel): ge=100, description="Maximum characters per result excerpt (values >30000 not guaranteed)", ) - source_policy: Optional[Dict[str, Any]] = Field( + source_policy: dict[str, Any] | None = Field( default=None, description="Optional source policy configuration" ) @@ -50,27 +50,27 @@ class ParallelSearchTool(BaseTool): "Search the web using Parallel's Search API (v1beta). Returns ranked results with " "compressed excerpts optimized for LLMs." ) - args_schema: Type[BaseModel] = ParallelSearchInput + args_schema: type[BaseModel] = ParallelSearchInput - env_vars: List[EnvVar] = [ + env_vars: list[EnvVar] = [ EnvVar( name="PARALLEL_API_KEY", description="API key for Parallel", required=True, ), ] - package_dependencies: List[str] = ["requests"] + package_dependencies: list[str] = ["requests"] search_url: str = "https://api.parallel.ai/v1beta/search" def _run( self, - objective: Optional[str] = None, - search_queries: Optional[List[str]] = None, + objective: str | None = None, + search_queries: list[str] | None = None, processor: str = "base", max_results: int = 10, max_chars_per_result: int = 6000, - source_policy: Optional[Dict[str, Any]] = None, + source_policy: dict[str, Any] | None = None, **_: Any, ) -> str: api_key = os.environ.get("PARALLEL_API_KEY") @@ -86,7 +86,7 @@ class ParallelSearchTool(BaseTool): } try: - payload: Dict[str, Any] = { + payload: dict[str, Any] = { "processor": processor, "max_results": max_results, "max_chars_per_result": max_chars_per_result, @@ -99,17 +99,21 @@ class ParallelSearchTool(BaseTool): payload["source_policy"] = source_policy request_timeout = 90 if processor == "pro" else 30 - resp = requests.post(self.search_url, json=payload, headers=headers, timeout=request_timeout) + resp = requests.post( + self.search_url, json=payload, headers=headers, timeout=request_timeout + ) if resp.status_code >= 300: - return f"Parallel Search API error: {resp.status_code} {resp.text[:200]}" + return ( + f"Parallel Search API error: {resp.status_code} {resp.text[:200]}" + ) data = resp.json() return self._format_output(data) except requests.Timeout: return "Parallel Search API timeout. Please try again later." - except Exception as exc: # noqa: BLE001 + except Exception as exc: return f"Unexpected error calling Parallel Search API: {exc}" - def _format_output(self, result: Dict[str, Any]) -> str: + def _format_output(self, result: dict[str, Any]) -> str: # Return the full JSON payload (search_id + results) as a compact JSON string try: import json diff --git a/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py b/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py index bc9a60aae..6bf39ab8a 100644 --- a/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py +++ b/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_eval_tool.py @@ -1,7 +1,7 @@ import json import os import warnings -from typing import Any, Dict, List, Optional +from typing import Any import requests from crewai.tools import BaseTool, EnvVar @@ -10,11 +10,15 @@ from crewai.tools import BaseTool, EnvVar class PatronusEvalTool(BaseTool): name: str = "Patronus Evaluation Tool" evaluate_url: str = "https://api.patronus.ai/v1/evaluate" - evaluators: List[Dict[str, str]] = [] - criteria: List[Dict[str, str]] = [] + evaluators: list[dict[str, str]] = [] + criteria: list[dict[str, str]] = [] description: str = "" - env_vars: List[EnvVar] = [ - EnvVar(name="PATRONUS_API_KEY", description="API key for Patronus evaluation services", required=True), + env_vars: list[EnvVar] = [ + EnvVar( + name="PATRONUS_API_KEY", + description="API key for Patronus evaluation services", + required=True, + ), ] def __init__(self, **kwargs: Any): @@ -24,7 +28,8 @@ class PatronusEvalTool(BaseTool): self.criteria = temp_criteria self.description = self._generate_description() warnings.warn( - "You are allowing the agent to select the best evaluator and criteria when you use the `PatronusEvalTool`. If this is not intended then please use `PatronusPredefinedCriteriaEvalTool` instead." + "You are allowing the agent to select the best evaluator and criteria when you use the `PatronusEvalTool`. If this is not intended then please use `PatronusPredefinedCriteriaEvalTool` instead.", + stacklevel=2, ) def _init_run(self): @@ -96,19 +101,19 @@ class PatronusEvalTool(BaseTool): 1. evaluated_model_input: str: The agent's task description in simple text 2. evaluated_model_output: str: The agent's output of the task 3. evaluated_model_retrieved_context: str: The agent's context - 4. evaluators: This is a list of dictionaries containing one of the following evaluators and the corresponding criteria. An example input for this field: [{{"evaluator": "Judge", "criteria": "patronus:is-code"}}] + 4. evaluators: This is a list of dictionaries containing one of the following evaluators and the corresponding criteria. An example input for this field: [{{"evaluator": "Judge", "criteria": "patronus:is-code"}}] - Evaluators: + Evaluators: {criteria} You must ONLY choose the most appropriate evaluator and criteria based on the "pass_criteria" or "description" fields for your evaluation task and nothing from outside of the options present.""" def _run( self, - evaluated_model_input: Optional[str], - evaluated_model_output: Optional[str], - evaluated_model_retrieved_context: Optional[str], - evaluators: List[Dict[str, str]], + evaluated_model_input: str | None, + evaluated_model_output: str | None, + evaluated_model_retrieved_context: str | None, + evaluators: list[dict[str, str]], ) -> Any: # Assert correct format of evaluators evals = [] diff --git a/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_local_evaluator_tool.py b/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_local_evaluator_tool.py index 30b78a3c4..763f1ee4c 100644 --- a/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_local_evaluator_tool.py +++ b/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_local_evaluator_tool.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Any, Type, List +from typing import TYPE_CHECKING, Any from crewai.tools import BaseTool from pydantic import BaseModel, ConfigDict, Field @@ -32,16 +32,14 @@ class FixedLocalEvaluatorToolSchema(BaseModel): class PatronusLocalEvaluatorTool(BaseTool): name: str = "Patronus Local Evaluator Tool" - description: str = ( - "This tool is used to evaluate the model input and output using custom function evaluators." - ) - args_schema: Type[BaseModel] = FixedLocalEvaluatorToolSchema + description: str = "This tool is used to evaluate the model input and output using custom function evaluators." + args_schema: type[BaseModel] = FixedLocalEvaluatorToolSchema client: "Client" = None evaluator: str evaluated_model_gold_answer: str model_config = ConfigDict(arbitrary_types_allowed=True) - package_dependencies: List[str] = ["patronus"] + package_dependencies: list[str] = ["patronus"] def __init__( self, @@ -99,7 +97,7 @@ class PatronusLocalEvaluatorTool(BaseTool): evaluated_model_gold_answer = self.evaluated_model_gold_answer evaluator = self.evaluator - result: "EvaluationResult" = self.client.evaluate( + result: EvaluationResult = self.client.evaluate( evaluator=evaluator, evaluated_model_input=evaluated_model_input, evaluated_model_output=evaluated_model_output, @@ -107,8 +105,7 @@ class PatronusLocalEvaluatorTool(BaseTool): evaluated_model_gold_answer=evaluated_model_gold_answer, tags={}, # Optional metadata, supports arbitrary key-value pairs ) - output = f"Evaluation result: {result.pass_}, Explanation: {result.explanation}" - return output + return f"Evaluation result: {result.pass_}, Explanation: {result.explanation}" try: diff --git a/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_predefined_criteria_eval_tool.py b/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_predefined_criteria_eval_tool.py index cf906586d..377dcb1a8 100644 --- a/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_predefined_criteria_eval_tool.py +++ b/packages/tools/src/crewai_tools/tools/patronus_eval_tool/patronus_predefined_criteria_eval_tool.py @@ -1,6 +1,6 @@ import json import os -from typing import Any, Dict, List, Type +from typing import Any import requests from crewai.tools import BaseTool @@ -8,19 +8,19 @@ from pydantic import BaseModel, Field class FixedBaseToolSchema(BaseModel): - evaluated_model_input: Dict = Field( + evaluated_model_input: dict = Field( ..., description="The agent's task description in simple text" ) - evaluated_model_output: Dict = Field( + evaluated_model_output: dict = Field( ..., description="The agent's output of the task" ) - evaluated_model_retrieved_context: Dict = Field( + evaluated_model_retrieved_context: dict = Field( ..., description="The agent's context" ) - evaluated_model_gold_answer: Dict = Field( + evaluated_model_gold_answer: dict = Field( ..., description="The agent's gold answer only if available" ) - evaluators: List[Dict[str, str]] = Field( + evaluators: list[dict[str, str]] = Field( ..., description="List of dictionaries containing the evaluator and criteria to evaluate the model input and output. An example input for this field: [{'evaluator': '[evaluator-from-user]', 'criteria': '[criteria-from-user]'}]", ) @@ -36,10 +36,10 @@ class PatronusPredefinedCriteriaEvalTool(BaseTool): name: str = "Call Patronus API tool for evaluation of model inputs and outputs" description: str = """This tool calls the Patronus Evaluation API that takes the following arguments:""" evaluate_url: str = "https://api.patronus.ai/v1/evaluate" - args_schema: Type[BaseModel] = FixedBaseToolSchema - evaluators: List[Dict[str, str]] = [] + args_schema: type[BaseModel] = FixedBaseToolSchema + evaluators: list[dict[str, str]] = [] - def __init__(self, evaluators: List[Dict[str, str]], **kwargs: Any): + def __init__(self, evaluators: list[dict[str, str]], **kwargs: Any): super().__init__(**kwargs) if evaluators: self.evaluators = evaluators diff --git a/packages/tools/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py b/packages/tools/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py index 96f141c17..4ecbe002f 100644 --- a/packages/tools/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/pdf_search_tool/pdf_search_tool.py @@ -1,13 +1,6 @@ -from typing import Optional, Type - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - from ..rag.rag_tool import RagTool @@ -30,9 +23,9 @@ class PDFSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a PDF's content." ) - args_schema: Type[BaseModel] = PDFSearchToolSchema + args_schema: type[BaseModel] = PDFSearchToolSchema - def __init__(self, pdf: Optional[str] = None, **kwargs): + def __init__(self, pdf: str | None = None, **kwargs): super().__init__(**kwargs) if pdf is not None: self.add(pdf) @@ -41,15 +34,17 @@ class PDFSearchTool(RagTool): self._generate_description() def add(self, pdf: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().add(pdf, data_type=DataType.PDF_FILE) def _run( self, query: str, - pdf: Optional[str] = None, + pdf: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if pdf is not None: self.add(pdf) - return super()._run(query=query) + return super()._run( + query=query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/pdf_text_writing_tool/pdf_text_writing_tool.py b/packages/tools/src/crewai_tools/tools/pdf_text_writing_tool/pdf_text_writing_tool.py index 1d8f3ffd8..c44bef70b 100644 --- a/packages/tools/src/crewai_tools/tools/pdf_text_writing_tool/pdf_text_writing_tool.py +++ b/packages/tools/src/crewai_tools/tools/pdf_text_writing_tool/pdf_text_writing_tool.py @@ -1,10 +1,8 @@ from pathlib import Path -from typing import Optional, Type - -from pydantic import BaseModel, Field -from pypdf import ContentStream, Font, NameObject, PageObject, PdfReader, PdfWriter from crewai_tools.tools.rag.rag_tool import RagTool +from pydantic import BaseModel, Field +from pypdf import ContentStream, Font, NameObject, PageObject, PdfReader, PdfWriter class PDFTextWritingToolSchema(BaseModel): @@ -19,10 +17,10 @@ class PDFTextWritingToolSchema(BaseModel): font_color: str = Field( default="0 0 0 rg", description="RGB color code for the text" ) - font_name: Optional[str] = Field( + font_name: str | None = Field( default="F1", description="Font name for standard fonts" ) - font_file: Optional[str] = Field( + font_file: str | None = Field( None, description="Path to a .ttf font file for custom font usage" ) page_number: int = Field(default=0, description="Page number to add text to") @@ -32,10 +30,8 @@ class PDFTextWritingTool(RagTool): """A tool to add text to specific positions in a PDF, with custom font support.""" name: str = "PDF Text Writing Tool" - description: str = ( - "A tool that can write text to a specific position in a PDF document, with optional custom font embedding." - ) - args_schema: Type[BaseModel] = PDFTextWritingToolSchema + description: str = "A tool that can write text to a specific position in a PDF document, with optional custom font embedding." + args_schema: type[BaseModel] = PDFTextWritingToolSchema def run( self, @@ -45,7 +41,7 @@ class PDFTextWritingTool(RagTool): font_size: int, font_color: str, font_name: str = "F1", - font_file: Optional[str] = None, + font_file: str | None = None, page_number: int = 0, ) -> str: reader = PdfReader(pdf_path) @@ -86,5 +82,4 @@ class PDFTextWritingTool(RagTool): """Embeds a TTF font into the PDF and returns the font name.""" with open(font_file, "rb") as file: font = Font.true_type(file.read()) - font_ref = writer.add_object(font) - return font_ref + return writer.add_object(font) diff --git a/packages/tools/src/crewai_tools/tools/pg_search_tool/pg_search_tool.py b/packages/tools/src/crewai_tools/tools/pg_search_tool/pg_search_tool.py index 30e294944..b2093c9fb 100644 --- a/packages/tools/src/crewai_tools/tools/pg_search_tool/pg_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/pg_search_tool/pg_search_tool.py @@ -1,11 +1,6 @@ -from typing import Any, Type - -try: - from embedchain.loaders.postgres import PostgresLoader - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False +from typing import Any +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -23,16 +18,14 @@ class PGSearchToolSchema(BaseModel): class PGSearchTool(RagTool): name: str = "Search a database's table content" description: str = "A tool that can be used to semantic search a query from a database table's content." - args_schema: Type[BaseModel] = PGSearchToolSchema + args_schema: type[BaseModel] = PGSearchToolSchema db_uri: str = Field(..., description="Mandatory database URI") def __init__(self, table_name: str, **kwargs): - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().__init__(**kwargs) - kwargs["data_type"] = "postgres" - kwargs["loader"] = PostgresLoader(config=dict(url=self.db_uri)) - self.add(table_name) + self.add( + table_name, data_type=DataType.POSTGRES, metadata={"db_uri": self.db_uri} + ) self.description = f"A tool that can be used to semantic search a query the {table_name} database table's content." self._generate_description() @@ -46,6 +39,13 @@ class PGSearchTool(RagTool): def _run( self, search_query: str, + similarity_threshold: float | None = None, + limit: int | None = None, **kwargs: Any, ) -> Any: - return super()._run(query=search_query, **kwargs) + return super()._run( + query=search_query, + similarity_threshold=similarity_threshold, + limit=limit, + **kwargs, + ) diff --git a/packages/tools/src/crewai_tools/tools/qdrant_vector_search_tool/qdrant_search_tool.py b/packages/tools/src/crewai_tools/tools/qdrant_vector_search_tool/qdrant_search_tool.py index 73e373ae8..9237bf5d4 100644 --- a/packages/tools/src/crewai_tools/tools/qdrant_vector_search_tool/qdrant_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/qdrant_vector_search_tool/qdrant_search_tool.py @@ -1,11 +1,11 @@ import json import os -from typing import Any, Callable, Optional, Type, List - +from collections.abc import Callable +from typing import Any try: from qdrant_client import QdrantClient - from qdrant_client.http.models import Filter, FieldCondition, MatchValue + from qdrant_client.http.models import FieldCondition, Filter, MatchValue QDRANT_AVAILABLE = True except ImportError: @@ -26,11 +26,11 @@ class QdrantToolSchema(BaseModel): ..., description="The query to search retrieve relevant information from the Qdrant database. Pass only the query, not the question.", ) - filter_by: Optional[str] = Field( + filter_by: str | None = Field( default=None, description="Filter by properties. Pass only the properties, not the question.", ) - filter_value: Optional[str] = Field( + filter_value: str | None = Field( default=None, description="Filter by value. Pass only the value, not the question.", ) @@ -55,26 +55,26 @@ class QdrantVectorSearchTool(BaseTool): client: QdrantClient = None name: str = "QdrantVectorSearchTool" description: str = "A tool to search the Qdrant database for relevant information on internal documents." - args_schema: Type[BaseModel] = QdrantToolSchema - query: Optional[str] = None - filter_by: Optional[str] = None - filter_value: Optional[str] = None - collection_name: Optional[str] = None - limit: Optional[int] = Field(default=3) + args_schema: type[BaseModel] = QdrantToolSchema + query: str | None = None + filter_by: str | None = None + filter_value: str | None = None + collection_name: str | None = None + limit: int | None = Field(default=3) score_threshold: float = Field(default=0.35) qdrant_url: str = Field( ..., description="The URL of the Qdrant server", ) - qdrant_api_key: Optional[str] = Field( + qdrant_api_key: str | None = Field( default=None, description="The API key for the Qdrant server", ) - custom_embedding_fn: Optional[Callable] = Field( + custom_embedding_fn: Callable | None = Field( default=None, description="A custom embedding function to use for vectorization. If not provided, the default model will be used.", ) - package_dependencies: List[str] = ["qdrant-client"] + package_dependencies: list[str] = ["qdrant-client"] def __init__(self, **kwargs): super().__init__(**kwargs) @@ -102,8 +102,8 @@ class QdrantVectorSearchTool(BaseTool): def _run( self, query: str, - filter_by: Optional[str] = None, - filter_value: Optional[str] = None, + filter_by: str | None = None, + filter_value: str | None = None, ) -> str: """Execute vector similarity search on Qdrant. @@ -172,7 +172,7 @@ class QdrantVectorSearchTool(BaseTool): import openai client = openai.Client(api_key=os.getenv("OPENAI_API_KEY")) - embedding = ( + return ( client.embeddings.create( input=[query], model=embedding_model, @@ -180,4 +180,3 @@ class QdrantVectorSearchTool(BaseTool): .data[0] .embedding ) - return embedding diff --git a/packages/tools/src/crewai_tools/tools/rag/rag_tool.py b/packages/tools/src/crewai_tools/tools/rag/rag_tool.py index 1a9fad8b8..b110cb9ee 100644 --- a/packages/tools/src/crewai_tools/tools/rag/rag_tool.py +++ b/packages/tools/src/crewai_tools/tools/rag/rag_tool.py @@ -1,17 +1,22 @@ -import portalocker - +import os from abc import ABC, abstractmethod -from typing import Any -from pydantic import BaseModel, ConfigDict, Field, model_validator +from typing import Any, cast +from crewai.rag.embeddings.factory import get_embedding_function from crewai.tools import BaseTool +from pydantic import BaseModel, ConfigDict, Field, model_validator class Adapter(BaseModel, ABC): model_config = ConfigDict(arbitrary_types_allowed=True) @abstractmethod - def query(self, question: str) -> str: + def query( + self, + question: str, + similarity_threshold: float | None = None, + limit: int | None = None, + ) -> str: """Query the knowledge base with a question and return the answer.""" @abstractmethod @@ -25,7 +30,12 @@ class Adapter(BaseModel, ABC): class RagTool(BaseTool): class _AdapterPlaceholder(Adapter): - def query(self, question: str) -> str: + def query( + self, + question: str, + similarity_threshold: float | None = None, + limit: int | None = None, + ) -> str: raise NotImplementedError def add(self, *args: Any, **kwargs: Any) -> None: @@ -34,28 +44,147 @@ class RagTool(BaseTool): name: str = "Knowledge base" description: str = "A knowledge base that can be used to answer questions." summarize: bool = False + similarity_threshold: float = 0.6 + limit: int = 5 adapter: Adapter = Field(default_factory=_AdapterPlaceholder) - config: dict[str, Any] | None = None + config: Any | None = None @model_validator(mode="after") def _set_default_adapter(self): if isinstance(self.adapter, RagTool._AdapterPlaceholder): - try: - from embedchain import App - except ImportError: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") + from crewai_tools.adapters.crewai_rag_adapter import CrewAIRagAdapter - from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter + parsed_config = self._parse_config(self.config) - with portalocker.Lock("crewai-rag-tool.lock", timeout=10): - app = App.from_config(config=self.config) if self.config else App() - - self.adapter = EmbedchainAdapter( - embedchain_app=app, summarize=self.summarize + self.adapter = CrewAIRagAdapter( + collection_name="rag_tool_collection", + summarize=self.summarize, + similarity_threshold=self.similarity_threshold, + limit=self.limit, + config=parsed_config, ) return self + def _parse_config(self, config: Any) -> Any: + """Parse complex config format to extract provider-specific config. + + Raises: + ValueError: If the config format is invalid or uses unsupported providers. + """ + if config is None: + return None + + if isinstance(config, dict) and "provider" in config: + return config + + if isinstance(config, dict): + if "vectordb" in config: + vectordb_config = config["vectordb"] + if isinstance(vectordb_config, dict) and "provider" in vectordb_config: + provider = vectordb_config["provider"] + provider_config = vectordb_config.get("config", {}) + + supported_providers = ["chromadb", "qdrant"] + if provider not in supported_providers: + raise ValueError( + f"Unsupported vector database provider: '{provider}'. " + f"CrewAI RAG currently supports: {', '.join(supported_providers)}." + ) + + embedding_config = config.get("embedding_model") + embedding_function = None + if embedding_config and isinstance(embedding_config, dict): + embedding_function = self._create_embedding_function( + embedding_config, provider + ) + + return self._create_provider_config( + provider, provider_config, embedding_function + ) + return None + embedding_config = config.get("embedding_model") + embedding_function = None + if embedding_config and isinstance(embedding_config, dict): + embedding_function = self._create_embedding_function( + embedding_config, "chromadb" + ) + + return self._create_provider_config("chromadb", {}, embedding_function) + return config + + @staticmethod + def _create_embedding_function(embedding_config: dict, provider: str) -> Any: + """Create embedding function for the specified vector database provider.""" + embedding_provider = embedding_config.get("provider") + embedding_model_config = embedding_config.get("config", {}).copy() + + if "model" in embedding_model_config: + embedding_model_config["model_name"] = embedding_model_config.pop("model") + + factory_config = {"provider": embedding_provider, **embedding_model_config} + + if embedding_provider == "openai" and "api_key" not in factory_config: + api_key = os.getenv("OPENAI_API_KEY") + if api_key: + factory_config["api_key"] = api_key + + print(f"Creating embedding function with config: {factory_config}") + + if provider == "chromadb": + embedding_func = get_embedding_function(factory_config) + print(f"Created embedding function: {embedding_func}") + print(f"Embedding function type: {type(embedding_func)}") + return embedding_func + + if provider == "qdrant": + chromadb_func = get_embedding_function(factory_config) + + def qdrant_embed_fn(text: str) -> list[float]: + """Embed text using ChromaDB function and convert to list of floats for Qdrant. + + Args: + text: The input text to embed. + + Returns: + A list of floats representing the embedding. + """ + embeddings = chromadb_func([text]) + return embeddings[0] if embeddings and len(embeddings) > 0 else [] + + return cast(Any, qdrant_embed_fn) + + return None + + @staticmethod + def _create_provider_config( + provider: str, provider_config: dict, embedding_function: Any + ) -> Any: + """Create proper provider config object.""" + if provider == "chromadb": + from crewai.rag.chromadb.config import ChromaDBConfig + + config_kwargs = {} + if embedding_function: + config_kwargs["embedding_function"] = embedding_function + + config_kwargs.update(provider_config) + + return ChromaDBConfig(**config_kwargs) + + if provider == "qdrant": + from crewai.rag.qdrant.config import QdrantConfig + + config_kwargs = {} + if embedding_function: + config_kwargs["embedding_function"] = embedding_function + + config_kwargs.update(provider_config) + + return QdrantConfig(**config_kwargs) + + return None + def add( self, *args: Any, @@ -66,5 +195,13 @@ class RagTool(BaseTool): def _run( self, query: str, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: - return f"Relevant Content:\n{self.adapter.query(query)}" + threshold = ( + similarity_threshold + if similarity_threshold is not None + else self.similarity_threshold + ) + result_limit = limit if limit is not None else self.limit + return f"Relevant Content:\n{self.adapter.query(query, similarity_threshold=threshold, limit=result_limit)}" diff --git a/packages/tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py b/packages/tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py index 61f5d9c8c..8601c07ee 100644 --- a/packages/tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py +++ b/packages/tools/src/crewai_tools/tools/scrape_element_from_website/scrape_element_from_website.py @@ -1,5 +1,5 @@ import os -from typing import Any, Optional, Type +from typing import Any import requests from crewai.tools import BaseTool @@ -7,6 +7,7 @@ from pydantic import BaseModel, Field try: from bs4 import BeautifulSoup + BEAUTIFULSOUP_AVAILABLE = True except ImportError: BEAUTIFULSOUP_AVAILABLE = False @@ -29,11 +30,11 @@ class ScrapeElementFromWebsiteToolSchema(FixedScrapeElementFromWebsiteToolSchema class ScrapeElementFromWebsiteTool(BaseTool): name: str = "Read a website content" description: str = "A tool that can be used to read a website content." - args_schema: Type[BaseModel] = ScrapeElementFromWebsiteToolSchema - website_url: Optional[str] = None - cookies: Optional[dict] = None - css_element: Optional[str] = None - headers: Optional[dict] = { + args_schema: type[BaseModel] = ScrapeElementFromWebsiteToolSchema + website_url: str | None = None + cookies: dict | None = None + css_element: str | None = None + headers: dict | None = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Language": "en-US,en;q=0.9", @@ -45,9 +46,9 @@ class ScrapeElementFromWebsiteTool(BaseTool): def __init__( self, - website_url: Optional[str] = None, - cookies: Optional[dict] = None, - css_element: Optional[str] = None, + website_url: str | None = None, + cookies: dict | None = None, + css_element: str | None = None, **kwargs, ): super().__init__(**kwargs) @@ -67,7 +68,9 @@ class ScrapeElementFromWebsiteTool(BaseTool): **kwargs: Any, ) -> Any: if not BEAUTIFULSOUP_AVAILABLE: - raise ImportError("beautifulsoup4 is not installed. Please install it with `pip install crewai-tools[beautifulsoup4]`") + raise ImportError( + "beautifulsoup4 is not installed. Please install it with `pip install crewai-tools[beautifulsoup4]`" + ) website_url = kwargs.get("website_url", self.website_url) css_element = kwargs.get("css_element", self.css_element) diff --git a/packages/tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py b/packages/tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py index 262e79a69..99ed1e3b1 100644 --- a/packages/tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py +++ b/packages/tools/src/crewai_tools/tools/scrape_website_tool/scrape_website_tool.py @@ -1,10 +1,12 @@ import os import re -from typing import Any, Optional, Type +from typing import Any import requests + try: from bs4 import BeautifulSoup + BEAUTIFULSOUP_AVAILABLE = True except ImportError: BEAUTIFULSOUP_AVAILABLE = False @@ -25,10 +27,10 @@ class ScrapeWebsiteToolSchema(FixedScrapeWebsiteToolSchema): class ScrapeWebsiteTool(BaseTool): name: str = "Read website content" description: str = "A tool that can be used to read a website content." - args_schema: Type[BaseModel] = ScrapeWebsiteToolSchema - website_url: Optional[str] = None - cookies: Optional[dict] = None - headers: Optional[dict] = { + args_schema: type[BaseModel] = ScrapeWebsiteToolSchema + website_url: str | None = None + cookies: dict | None = None + headers: dict | None = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9", "Accept-Language": "en-US,en;q=0.9", @@ -39,13 +41,15 @@ class ScrapeWebsiteTool(BaseTool): def __init__( self, - website_url: Optional[str] = None, - cookies: Optional[dict] = None, + website_url: str | None = None, + cookies: dict | None = None, **kwargs, ): super().__init__(**kwargs) if not BEAUTIFULSOUP_AVAILABLE: - raise ImportError("beautifulsoup4 is not installed. Please install it with `pip install crewai-tools[beautifulsoup4]`") + raise ImportError( + "beautifulsoup4 is not installed. Please install it with `pip install crewai-tools[beautifulsoup4]`" + ) if website_url is not None: self.website_url = website_url @@ -75,5 +79,4 @@ class ScrapeWebsiteTool(BaseTool): text = "The following text is scraped website content:\n\n" text += parsed.get_text(" ") text = re.sub("[ \t]+", " ", text) - text = re.sub("\\s+\n\\s+", "\n", text) - return text + return re.sub("\\s+\n\\s+", "\n", text) diff --git a/packages/tools/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py b/packages/tools/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py index 34f42e52e..4baef03cc 100644 --- a/packages/tools/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py +++ b/packages/tools/src/crewai_tools/tools/scrapegraph_scrape_tool/scrapegraph_scrape_tool.py @@ -1,5 +1,5 @@ import os -from typing import TYPE_CHECKING, Any, Optional, Type, List +from typing import TYPE_CHECKING, Any, Optional from urllib.parse import urlparse from crewai.tools import BaseTool, EnvVar @@ -32,7 +32,7 @@ class ScrapegraphScrapeToolSchema(FixedScrapegraphScrapeToolSchema): ) @field_validator("website_url") - def validate_url(cls, v): + def validate_url(self, v): """Validate URL format""" try: result = urlparse(v) @@ -61,22 +61,26 @@ class ScrapegraphScrapeTool(BaseTool): description: str = ( "A tool that uses Scrapegraph AI to intelligently scrape website content." ) - args_schema: Type[BaseModel] = ScrapegraphScrapeToolSchema - website_url: Optional[str] = None - user_prompt: Optional[str] = None - api_key: Optional[str] = None + args_schema: type[BaseModel] = ScrapegraphScrapeToolSchema + website_url: str | None = None + user_prompt: str | None = None + api_key: str | None = None enable_logging: bool = False _client: Optional["Client"] = None - package_dependencies: List[str] = ["scrapegraph-py"] - env_vars: List[EnvVar] = [ - EnvVar(name="SCRAPEGRAPH_API_KEY", description="API key for Scrapegraph AI services", required=False), + package_dependencies: list[str] = ["scrapegraph-py"] + env_vars: list[EnvVar] = [ + EnvVar( + name="SCRAPEGRAPH_API_KEY", + description="API key for Scrapegraph AI services", + required=False, + ), ] def __init__( self, - website_url: Optional[str] = None, - user_prompt: Optional[str] = None, - api_key: Optional[str] = None, + website_url: str | None = None, + user_prompt: str | None = None, + api_key: str | None = None, enable_logging: bool = False, **kwargs, ): @@ -167,17 +171,15 @@ class ScrapegraphScrapeTool(BaseTool): try: # Make the SmartScraper request - response = self._client.smartscraper( + return self._client.smartscraper( website_url=website_url, user_prompt=user_prompt, ) - return response - except RateLimitError: raise # Re-raise rate limit errors except Exception as e: - raise RuntimeError(f"Scraping failed: {str(e)}") + raise RuntimeError(f"Scraping failed: {e!s}") finally: # Always close the client self._client.close() diff --git a/packages/tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py b/packages/tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py index 38bdab2a0..a2ab86201 100644 --- a/packages/tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py +++ b/packages/tools/src/crewai_tools/tools/scrapfly_scrape_website_tool/scrapfly_scrape_website_tool.py @@ -1,6 +1,6 @@ -import os import logging -from typing import Any, Dict, Literal, Optional, Type, List +import os +from typing import Any, Literal from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field @@ -10,13 +10,13 @@ logger = logging.getLogger(__file__) class ScrapflyScrapeWebsiteToolSchema(BaseModel): url: str = Field(description="Webpage URL") - scrape_format: Optional[Literal["raw", "markdown", "text"]] = Field( + scrape_format: Literal["raw", "markdown", "text"] | None = Field( default="markdown", description="Webpage extraction format" ) - scrape_config: Optional[Dict[str, Any]] = Field( + scrape_config: dict[str, Any] | None = Field( default=None, description="Scrapfly request scrape config" ) - ignore_scrape_failures: Optional[bool] = Field( + ignore_scrape_failures: bool | None = Field( default=None, description="whether to ignore failures" ) @@ -26,12 +26,14 @@ class ScrapflyScrapeWebsiteTool(BaseTool): description: str = ( "Scrape a webpage url using Scrapfly and return its content as markdown or text" ) - args_schema: Type[BaseModel] = ScrapflyScrapeWebsiteToolSchema + args_schema: type[BaseModel] = ScrapflyScrapeWebsiteToolSchema api_key: str = None - scrapfly: Optional[Any] = None - package_dependencies: List[str] = ["scrapfly-sdk"] - env_vars: List[EnvVar] = [ - EnvVar(name="SCRAPFLY_API_KEY", description="API key for Scrapfly", required=True), + scrapfly: Any | None = None + package_dependencies: list[str] = ["scrapfly-sdk"] + env_vars: list[EnvVar] = [ + EnvVar( + name="SCRAPFLY_API_KEY", description="API key for Scrapfly", required=True + ), ] def __init__(self, api_key: str): @@ -57,8 +59,8 @@ class ScrapflyScrapeWebsiteTool(BaseTool): self, url: str, scrape_format: str = "markdown", - scrape_config: Optional[Dict[str, Any]] = None, - ignore_scrape_failures: Optional[bool] = None, + scrape_config: dict[str, Any] | None = None, + ignore_scrape_failures: bool | None = None, ): from scrapfly import ScrapeApiResponse, ScrapeConfig @@ -72,5 +74,4 @@ class ScrapflyScrapeWebsiteTool(BaseTool): if ignore_scrape_failures: logger.error(f"Error fetching data from {url}, exception: {e}") return None - else: - raise e + raise e diff --git a/packages/tools/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py b/packages/tools/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py index 5f7365c8a..48a8e913a 100644 --- a/packages/tools/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py +++ b/packages/tools/src/crewai_tools/tools/selenium_scraping_tool/selenium_scraping_tool.py @@ -1,6 +1,6 @@ import re import time -from typing import Any, Optional, Type, List +from typing import Any from urllib.parse import urlparse from crewai.tools import BaseTool @@ -24,7 +24,7 @@ class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): ) @field_validator("website_url") - def validate_website_url(cls, v): + def validate_website_url(self, v): if not v: raise ValueError("Website URL cannot be empty") @@ -39,7 +39,7 @@ class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): if not all([result.scheme, result.netloc]): raise ValueError("Invalid URL format") except Exception as e: - raise ValueError(f"Invalid URL: {str(e)}") + raise ValueError(f"Invalid URL: {e!s}") if re.search(r"\s", v): raise ValueError("URL cannot contain whitespace") @@ -50,21 +50,21 @@ class SeleniumScrapingToolSchema(FixedSeleniumScrapingToolSchema): class SeleniumScrapingTool(BaseTool): name: str = "Read a website content" description: str = "A tool that can be used to read a website content." - args_schema: Type[BaseModel] = SeleniumScrapingToolSchema - website_url: Optional[str] = None - driver: Optional[Any] = None - cookie: Optional[dict] = None - wait_time: Optional[int] = 3 - css_element: Optional[str] = None - return_html: Optional[bool] = False - _by: Optional[Any] = None - package_dependencies: List[str] = ["selenium", "webdriver-manager"] + args_schema: type[BaseModel] = SeleniumScrapingToolSchema + website_url: str | None = None + driver: Any | None = None + cookie: dict | None = None + wait_time: int | None = 3 + css_element: str | None = None + return_html: bool | None = False + _by: Any | None = None + package_dependencies: list[str] = ["selenium", "webdriver-manager"] def __init__( self, - website_url: Optional[str] = None, - cookie: Optional[dict] = None, - css_element: Optional[str] = None, + website_url: str | None = None, + cookie: dict | None = None, + css_element: str | None = None, **kwargs, ): super().__init__(**kwargs) @@ -92,15 +92,15 @@ class SeleniumScrapingTool(BaseTool): "`selenium` and `webdriver-manager` package not found, please run `uv add selenium webdriver-manager`" ) - if 'driver' not in kwargs: - if 'options' not in kwargs: + if "driver" not in kwargs: + if "options" not in kwargs: options: Options = Options() options.add_argument("--headless") else: - options = kwargs['options'] + options = kwargs["options"] self.driver = webdriver.Chrome(options=options) else: - self.driver = kwargs['driver'] + self.driver = kwargs["driver"] self._by = By if cookie is not None: @@ -130,7 +130,7 @@ class SeleniumScrapingTool(BaseTool): content = self._get_content(css_element, return_html) return "\n".join(content) except Exception as e: - return f"Error scraping website: {str(e)}" + return f"Error scraping website: {e!s}" finally: self.driver.close() diff --git a/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py b/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py index aa73d63d5..e554f3f39 100644 --- a/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py +++ b/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_base_tool.py @@ -1,6 +1,6 @@ import os import re -from typing import Any, Optional, Union, List +from typing import Any from crewai.tools import BaseTool, EnvVar @@ -8,12 +8,16 @@ from crewai.tools import BaseTool, EnvVar class SerpApiBaseTool(BaseTool): """Base class for SerpApi functionality with shared capabilities.""" - package_dependencies: List[str] = ["serpapi"] - env_vars: List[EnvVar] = [ - EnvVar(name="SERPAPI_API_KEY", description="API key for SerpApi searches", required=True), + package_dependencies: list[str] = ["serpapi"] + env_vars: list[EnvVar] = [ + EnvVar( + name="SERPAPI_API_KEY", + description="API key for SerpApi searches", + required=True, + ), ] - client: Optional[Any] = None + client: Any | None = None def __init__(self, **kwargs): super().__init__(**kwargs) @@ -41,7 +45,7 @@ class SerpApiBaseTool(BaseTool): ) self.client = Client(api_key=api_key) - def _omit_fields(self, data: Union[dict, list], omit_patterns: list[str]) -> None: + def _omit_fields(self, data: dict | list, omit_patterns: list[str]) -> None: if isinstance(data, dict): for field in list(data.keys()): if any(re.compile(p).match(field) for p in omit_patterns): diff --git a/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py b/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py index 9f11611ab..1fe2ef1a8 100644 --- a/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_search_tool.py @@ -1,7 +1,7 @@ -from typing import Any, Optional, Type +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field -import re -from pydantic import BaseModel, Field, ConfigDict from .serpapi_base_tool import SerpApiBaseTool try: @@ -16,7 +16,7 @@ class SerpApiGoogleSearchToolSchema(BaseModel): search_query: str = Field( ..., description="Mandatory search query you want to use to Google search." ) - location: Optional[str] = Field( + location: str | None = Field( None, description="Location you want the search to be performed in." ) @@ -29,7 +29,7 @@ class SerpApiGoogleSearchTool(SerpApiBaseTool): description: str = ( "A tool to perform to perform a Google search with a search_query." ) - args_schema: Type[BaseModel] = SerpApiGoogleSearchToolSchema + args_schema: type[BaseModel] = SerpApiGoogleSearchToolSchema def _run( self, @@ -57,4 +57,4 @@ class SerpApiGoogleSearchTool(SerpApiBaseTool): return results except HTTPError as e: - return f"An error occurred: {str(e)}. Some parameters may be invalid." + return f"An error occurred: {e!s}. Some parameters may be invalid." diff --git a/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py b/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py index 428bb6b52..6fe912724 100644 --- a/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py +++ b/packages/tools/src/crewai_tools/tools/serpapi_tool/serpapi_google_shopping_tool.py @@ -1,8 +1,8 @@ -from typing import Any, Optional, Type +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field -from pydantic import BaseModel, Field from .serpapi_base_tool import SerpApiBaseTool -from pydantic import ConfigDict try: from serpapi import HTTPError @@ -16,7 +16,7 @@ class SerpApiGoogleShoppingToolSchema(BaseModel): search_query: str = Field( ..., description="Mandatory search query you want to use to Google shopping." ) - location: Optional[str] = Field( + location: str | None = Field( None, description="Location you want the search to be performed in." ) @@ -29,7 +29,7 @@ class SerpApiGoogleShoppingTool(SerpApiBaseTool): description: str = ( "A tool to perform search on Google shopping with a search_query." ) - args_schema: Type[BaseModel] = SerpApiGoogleShoppingToolSchema + args_schema: type[BaseModel] = SerpApiGoogleShoppingToolSchema def _run( self, @@ -57,4 +57,4 @@ class SerpApiGoogleShoppingTool(SerpApiBaseTool): return results except HTTPError as e: - return f"An error occurred: {str(e)}. Some parameters may be invalid." + return f"An error occurred: {e!s}. Some parameters may be invalid." diff --git a/packages/tools/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py b/packages/tools/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py index 23f15dd92..4cc26ec43 100644 --- a/packages/tools/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py +++ b/packages/tools/src/crewai_tools/tools/serper_dev_tool/serper_dev_tool.py @@ -2,7 +2,7 @@ import datetime import json import logging import os -from typing import Any, List, Optional, Type +from typing import Any import requests from crewai.tools import BaseTool, EnvVar @@ -37,15 +37,15 @@ class SerperDevTool(BaseTool): "A tool that can be used to search the internet with a search_query. " "Supports different search types: 'search' (default), 'news'" ) - args_schema: Type[BaseModel] = SerperDevToolSchema + args_schema: type[BaseModel] = SerperDevToolSchema base_url: str = "https://google.serper.dev" n_results: int = 10 save_file: bool = False search_type: str = "search" - country: Optional[str] = "" - location: Optional[str] = "" - locale: Optional[str] = "" - env_vars: List[EnvVar] = [ + country: str | None = "" + location: str | None = "" + locale: str | None = "" + env_vars: list[EnvVar] = [ EnvVar(name="SERPER_API_KEY", description="API key for Serper", required=True), ] diff --git a/packages/tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py b/packages/tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py index cefb431f4..060979c50 100644 --- a/packages/tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py +++ b/packages/tools/src/crewai_tools/tools/serper_scrape_website_tool/serper_scrape_website_tool.py @@ -1,17 +1,18 @@ -from crewai.tools import BaseTool, EnvVar -from typing import Type, List -from pydantic import BaseModel, Field -import requests import json import os +import requests +from crewai.tools import BaseTool, EnvVar +from pydantic import BaseModel, Field + class SerperScrapeWebsiteInput(BaseModel): """Input schema for SerperScrapeWebsite.""" + url: str = Field(..., description="The URL of the website to scrape") include_markdown: bool = Field( - default=True, - description="Whether to include markdown formatting in the scraped content" + default=True, + description="Whether to include markdown formatting in the scraped content", ) @@ -22,59 +23,53 @@ class SerperScrapeWebsiteTool(BaseTool): "This tool can extract clean, readable content from any website URL, " "optionally including markdown formatting for better structure." ) - args_schema: Type[BaseModel] = SerperScrapeWebsiteInput - env_vars: List[EnvVar] = [ + args_schema: type[BaseModel] = SerperScrapeWebsiteInput + env_vars: list[EnvVar] = [ EnvVar(name="SERPER_API_KEY", description="API key for Serper", required=True), ] def _run(self, url: str, include_markdown: bool = True) -> str: """ Scrape website content using Serper API. - + Args: url: The URL to scrape include_markdown: Whether to include markdown formatting - + Returns: Scraped website content as a string """ try: # Serper API endpoint api_url = "https://scrape.serper.dev" - + # Get API key from environment variable for security - api_key = os.getenv('SERPER_API_KEY') - + api_key = os.getenv("SERPER_API_KEY") + # Prepare the payload - payload = json.dumps({ - "url": url, - "includeMarkdown": include_markdown - }) - + payload = json.dumps({"url": url, "includeMarkdown": include_markdown}) + # Set headers - headers = { - 'X-API-KEY': api_key, - 'Content-Type': 'application/json' - } - + headers = {"X-API-KEY": api_key, "Content-Type": "application/json"} + # Make the API request response = requests.post(api_url, headers=headers, data=payload) - + # Check if request was successful if response.status_code == 200: result = response.json() - + # Extract the scraped content - if 'text' in result: - return result['text'] - else: - return f"Successfully scraped {url}, but no text content found in response: {response.text}" - else: - return f"Error scraping {url}: HTTP {response.status_code} - {response.text}" - + if "text" in result: + return result["text"] + return f"Successfully scraped {url}, but no text content found in response: {response.text}" + return ( + f"Error scraping {url}: HTTP {response.status_code} - {response.text}" + ) + except requests.exceptions.RequestException as e: - return f"Network error while scraping {url}: {str(e)}" + return f"Network error while scraping {url}: {e!s}" except json.JSONDecodeError as e: - return f"Error parsing JSON response while scraping {url}: {str(e)}" + return f"Error parsing JSON response while scraping {url}: {e!s}" except Exception as e: - return f"Unexpected error while scraping {url}: {str(e)}" + return f"Unexpected error while scraping {url}: {e!s}" diff --git a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py index 9d99fa01b..d3418fe0b 100644 --- a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_job_search_tool.py @@ -1,12 +1,10 @@ import os -from typing import Any, List, Optional, Type from urllib.parse import urlencode import requests -from pydantic import BaseModel, Field - from crewai.tools import EnvVar from crewai_tools.tools.rag.rag_tool import RagTool +from pydantic import BaseModel, Field class SerplyJobSearchToolSchema(BaseModel): @@ -23,16 +21,20 @@ class SerplyJobSearchTool(RagTool): description: str = ( "A tool to perform to perform a job search in the US with a search_query." ) - args_schema: Type[BaseModel] = SerplyJobSearchToolSchema + args_schema: type[BaseModel] = SerplyJobSearchToolSchema request_url: str = "https://api.serply.io/v1/job/search/" - proxy_location: Optional[str] = "US" + proxy_location: str | None = "US" """ proxy_location: (str): Where to get jobs, specifically for a specific country results. - Currently only supports US """ - headers: Optional[dict] = {} - env_vars: List[EnvVar] = [ - EnvVar(name="SERPLY_API_KEY", description="API key for Serply services", required=True), + headers: dict | None = {} + env_vars: list[EnvVar] = [ + EnvVar( + name="SERPLY_API_KEY", + description="API key for Serply services", + required=True, + ), ] def __init__(self, **kwargs): @@ -45,8 +47,8 @@ class SerplyJobSearchTool(RagTool): def _run( self, - query: Optional[str] = None, - search_query: Optional[str] = None, + query: str | None = None, + search_query: str | None = None, ) -> str: query_payload = {} @@ -75,7 +77,7 @@ class SerplyJobSearchTool(RagTool): f"Employer: {job['employer']}", f"Location: {job['location']}", f"Link: {job['link']}", - f"""Highest: {', '.join([h for h in job['highlights']])}""", + f"""Highest: {", ".join([h for h in job["highlights"]])}""", f"Is Remote: {job['is_remote']}", f"Is Hybrid: {job['is_remote']}", "---", diff --git a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py index 5a2b27798..36ace28a1 100644 --- a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_news_search_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, List, Optional, Type +from typing import Any from urllib.parse import urlencode import requests @@ -18,17 +18,21 @@ class SerplyNewsSearchToolSchema(BaseModel): class SerplyNewsSearchTool(BaseTool): name: str = "News Search" description: str = "A tool to perform News article search with a search_query." - args_schema: Type[BaseModel] = SerplyNewsSearchToolSchema + args_schema: type[BaseModel] = SerplyNewsSearchToolSchema search_url: str = "https://api.serply.io/v1/news/" - proxy_location: Optional[str] = "US" - headers: Optional[dict] = {} - limit: Optional[int] = 10 - env_vars: List[EnvVar] = [ - EnvVar(name="SERPLY_API_KEY", description="API key for Serply services", required=True), + proxy_location: str | None = "US" + headers: dict | None = {} + limit: int | None = 10 + env_vars: list[EnvVar] = [ + EnvVar( + name="SERPLY_API_KEY", + description="API key for Serply services", + required=True, + ), ] def __init__( - self, limit: Optional[int] = 10, proxy_location: Optional[str] = "US", **kwargs + self, limit: int | None = 10, proxy_location: str | None = "US", **kwargs ): """ param: limit (int): The maximum number of results to return [10-100, defaults to 10] @@ -85,5 +89,4 @@ class SerplyNewsSearchTool(BaseTool): content = "\n".join(string) return f"\nSearch results: {content}\n" - else: - return results + return results diff --git a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py index c49734c56..8c8bdf6c6 100644 --- a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_scholar_search_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, List, Optional, Type +from typing import Any from urllib.parse import urlencode import requests @@ -21,16 +21,20 @@ class SerplyScholarSearchTool(BaseTool): description: str = ( "A tool to perform scholarly literature search with a search_query." ) - args_schema: Type[BaseModel] = SerplyScholarSearchToolSchema + args_schema: type[BaseModel] = SerplyScholarSearchToolSchema search_url: str = "https://api.serply.io/v1/scholar/" - hl: Optional[str] = "us" - proxy_location: Optional[str] = "US" - headers: Optional[dict] = {} - env_vars: List[EnvVar] = [ - EnvVar(name="SERPLY_API_KEY", description="API key for Serply services", required=True), + hl: str | None = "us" + proxy_location: str | None = "US" + headers: dict | None = {} + env_vars: list[EnvVar] = [ + EnvVar( + name="SERPLY_API_KEY", + description="API key for Serply services", + required=True, + ), ] - def __init__(self, hl: str = "us", proxy_location: Optional[str] = "US", **kwargs): + def __init__(self, hl: str = "us", proxy_location: str | None = "US", **kwargs): """ param: hl (str): host Language code to display results in (reference https://developers.google.com/custom-search/docs/xml_results?hl=en#wsInterfaceLanguages) diff --git a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py index 6801f4065..395b0d4c5 100644 --- a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_web_search_tool.py @@ -1,5 +1,5 @@ import os -from typing import Any, List, Optional, Type +from typing import Any from urllib.parse import urlencode import requests @@ -18,16 +18,20 @@ class SerplyWebSearchToolSchema(BaseModel): class SerplyWebSearchTool(BaseTool): name: str = "Google Search" description: str = "A tool to perform Google search with a search_query." - args_schema: Type[BaseModel] = SerplyWebSearchToolSchema + args_schema: type[BaseModel] = SerplyWebSearchToolSchema search_url: str = "https://api.serply.io/v1/search/" - hl: Optional[str] = "us" - limit: Optional[int] = 10 - device_type: Optional[str] = "desktop" - proxy_location: Optional[str] = "US" - query_payload: Optional[dict] = {} - headers: Optional[dict] = {} - env_vars: List[EnvVar] = [ - EnvVar(name="SERPLY_API_KEY", description="API key for Serply services", required=True), + hl: str | None = "us" + limit: int | None = 10 + device_type: str | None = "desktop" + proxy_location: str | None = "US" + query_payload: dict | None = {} + headers: dict | None = {} + env_vars: list[EnvVar] = [ + EnvVar( + name="SERPLY_API_KEY", + description="API key for Serply services", + required=True, + ), ] def __init__( @@ -100,5 +104,4 @@ class SerplyWebSearchTool(BaseTool): content = "\n".join(string) return f"\nSearch results: {content}\n" - else: - return results + return results diff --git a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py index fa2404f75..fc5438be7 100644 --- a/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py +++ b/packages/tools/src/crewai_tools/tools/serply_api_tool/serply_webpage_to_markdown_tool.py @@ -1,11 +1,9 @@ import os -from typing import Any, List, Optional, Type import requests from crewai.tools import EnvVar -from pydantic import BaseModel, Field - from crewai_tools.tools.rag.rag_tool import RagTool +from pydantic import BaseModel, Field class SerplyWebpageToMarkdownToolSchema(BaseModel): @@ -19,18 +17,20 @@ class SerplyWebpageToMarkdownToolSchema(BaseModel): class SerplyWebpageToMarkdownTool(RagTool): name: str = "Webpage to Markdown" - description: str = ( - "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand" - ) - args_schema: Type[BaseModel] = SerplyWebpageToMarkdownToolSchema + description: str = "A tool to perform convert a webpage to markdown to make it easier for LLMs to understand" + args_schema: type[BaseModel] = SerplyWebpageToMarkdownToolSchema request_url: str = "https://api.serply.io/v1/request" - proxy_location: Optional[str] = "US" - headers: Optional[dict] = {} - env_vars: List[EnvVar] = [ - EnvVar(name="SERPLY_API_KEY", description="API key for Serply services", required=True), + proxy_location: str | None = "US" + headers: dict | None = {} + env_vars: list[EnvVar] = [ + EnvVar( + name="SERPLY_API_KEY", + description="API key for Serply services", + required=True, + ), ] - def __init__(self, proxy_location: Optional[str] = "US", **kwargs): + def __init__(self, proxy_location: str | None = "US", **kwargs): """ proxy_location: (str): Where to perform the search, specifically for a specific country results. ['US', 'CA', 'IE', 'GB', 'FR', 'DE', 'SE', 'IN', 'JP', 'KR', 'SG', 'AU', 'BR'] (defaults to US) diff --git a/packages/tools/src/crewai_tools/tools/singlestore_search_tool/singlestore_search_tool.py b/packages/tools/src/crewai_tools/tools/singlestore_search_tool/singlestore_search_tool.py index 4c8d768a3..24f3618cb 100644 --- a/packages/tools/src/crewai_tools/tools/singlestore_search_tool/singlestore_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/singlestore_search_tool/singlestore_search_tool.py @@ -1,4 +1,5 @@ -from typing import Any, Callable, Dict, List, Optional, Type +from collections.abc import Callable +from typing import Any from crewai.tools import BaseTool, EnvVar from pydantic import BaseModel, Field @@ -40,10 +41,10 @@ class SingleStoreSearchTool(BaseTool): description: str = ( "A tool that can be used to semantic search a query from a database." ) - args_schema: Type[BaseModel] = SingleStoreSearchToolSchema + args_schema: type[BaseModel] = SingleStoreSearchToolSchema - package_dependencies: List[str] = ["singlestoredb", "SQLAlchemy"] - env_vars: List[EnvVar] = [ + package_dependencies: list[str] = ["singlestoredb", "SQLAlchemy"] + env_vars: list[EnvVar] = [ EnvVar( name="SINGLESTOREDB_URL", description="A comprehensive URL string that can encapsulate host, port," @@ -112,57 +113,57 @@ class SingleStoreSearchTool(BaseTool): ] connection_args: dict = {} - connection_pool: Optional[Any] = None + connection_pool: Any | None = None def __init__( self, - tables: List[str] = [], + tables: list[str] | None = None, # Basic connection parameters - host: Optional[str] = None, - user: Optional[str] = None, - password: Optional[str] = None, - port: Optional[int] = None, - database: Optional[str] = None, - driver: Optional[str] = None, + host: str | None = None, + user: str | None = None, + password: str | None = None, + port: int | None = None, + database: str | None = None, + driver: str | None = None, # Connection behavior options - pure_python: Optional[bool] = None, - local_infile: Optional[bool] = None, - charset: Optional[str] = None, + pure_python: bool | None = None, + local_infile: bool | None = None, + charset: str | None = None, # SSL/TLS configuration - ssl_key: Optional[str] = None, - ssl_cert: Optional[str] = None, - ssl_ca: Optional[str] = None, - ssl_disabled: Optional[bool] = None, - ssl_cipher: Optional[str] = None, - ssl_verify_cert: Optional[bool] = None, - tls_sni_servername: Optional[str] = None, - ssl_verify_identity: Optional[bool] = None, + ssl_key: str | None = None, + ssl_cert: str | None = None, + ssl_ca: str | None = None, + ssl_disabled: bool | None = None, + ssl_cipher: str | None = None, + ssl_verify_cert: bool | None = None, + tls_sni_servername: str | None = None, + ssl_verify_identity: bool | None = None, # Advanced connection options - conv: Optional[Dict[int, Callable[..., Any]]] = None, - credential_type: Optional[str] = None, - autocommit: Optional[bool] = None, + conv: dict[int, Callable[..., Any]] | None = None, + credential_type: str | None = None, + autocommit: bool | None = None, # Result formatting options - results_type: Optional[str] = None, - buffered: Optional[bool] = None, - results_format: Optional[str] = None, - program_name: Optional[str] = None, - conn_attrs: Optional[Dict[str, str]] = {}, + results_type: str | None = None, + buffered: bool | None = None, + results_format: str | None = None, + program_name: str | None = None, + conn_attrs: dict[str, str] | None = None, # Query execution options - multi_statements: Optional[bool] = None, - client_found_rows: Optional[bool] = None, - connect_timeout: Optional[int] = None, + multi_statements: bool | None = None, + client_found_rows: bool | None = None, + connect_timeout: int | None = None, # Data type handling - nan_as_null: Optional[bool] = None, - inf_as_null: Optional[bool] = None, - encoding_errors: Optional[str] = None, - track_env: Optional[bool] = None, - enable_extended_data_types: Optional[bool] = None, - vector_data_format: Optional[str] = None, - parse_json: Optional[bool] = None, + nan_as_null: bool | None = None, + inf_as_null: bool | None = None, + encoding_errors: str | None = None, + track_env: bool | None = None, + enable_extended_data_types: bool | None = None, + vector_data_format: str | None = None, + parse_json: bool | None = None, # Connection pool configuration - pool_size: Optional[int] = 5, - max_overflow: Optional[int] = 10, - timeout: Optional[float] = 30, + pool_size: int | None = 5, + max_overflow: int | None = 10, + timeout: float | None = 30, **kwargs, ): """Initialize the SingleStore search tool. @@ -180,6 +181,10 @@ class SingleStoreSearchTool(BaseTool): **kwargs: Additional arguments passed to the parent class """ + if conn_attrs is None: + conn_attrs = {} + if tables is None: + tables = [] if not SINGLSTORE_AVAILABLE: import click @@ -257,9 +262,9 @@ class SingleStoreSearchTool(BaseTool): self.connection_args["conn_attrs"] = dict() # Add tool identification to connection attributes - self.connection_args["conn_attrs"][ - "_connector_name" - ] = "crewAI SingleStore Tool" + self.connection_args["conn_attrs"]["_connector_name"] = ( + "crewAI SingleStore Tool" + ) self.connection_args["conn_attrs"]["_connector_version"] = "1.0" # Initialize connection pool for efficient connection management @@ -273,7 +278,7 @@ class SingleStoreSearchTool(BaseTool): # Validate database schema and initialize table information self._initialize_tables(tables) - def _initialize_tables(self, tables: List[str]) -> None: + def _initialize_tables(self, tables: list[str]) -> None: """Initialize and validate the tables that this tool will work with. Args: @@ -325,7 +330,7 @@ class SingleStoreSearchTool(BaseTool): ) self._generate_description() - def _get_connection(self) -> Optional[Any]: + def _get_connection(self) -> Any | None: """Get a connection from the connection pool. Returns: @@ -335,13 +340,12 @@ class SingleStoreSearchTool(BaseTool): Exception: If connection cannot be established """ try: - conn = self.connection_pool.connect() - return conn + return self.connection_pool.connect() except Exception: # Re-raise the exception to be handled by the caller raise - def _create_connection(self) -> Optional[Any]: + def _create_connection(self) -> Any | None: """Create a new SingleStore connection. This method is used by the connection pool to create new connections @@ -354,8 +358,7 @@ class SingleStoreSearchTool(BaseTool): Exception: If connection cannot be created """ try: - conn = connect(**self.connection_args) - return conn + return connect(**self.connection_args) except Exception: # Re-raise the exception to be handled by the caller raise @@ -379,7 +382,7 @@ class SingleStoreSearchTool(BaseTool): query_lower = search_query.strip().lower() # Allow only SELECT and SHOW statements - if not (query_lower.startswith("select") or query_lower.startswith("show")): + if not (query_lower.startswith(("select", "show"))): return ( False, "Only SELECT and SHOW queries are supported for security reasons.", diff --git a/packages/tools/src/crewai_tools/tools/snowflake_search_tool/__init__.py b/packages/tools/src/crewai_tools/tools/snowflake_search_tool/__init__.py index abc1a45f5..3b695dd7d 100644 --- a/packages/tools/src/crewai_tools/tools/snowflake_search_tool/__init__.py +++ b/packages/tools/src/crewai_tools/tools/snowflake_search_tool/__init__.py @@ -5,7 +5,7 @@ from .snowflake_search_tool import ( ) __all__ = [ + "SnowflakeConfig", "SnowflakeSearchTool", "SnowflakeSearchToolInput", - "SnowflakeConfig", ] diff --git a/packages/tools/src/crewai_tools/tools/snowflake_search_tool/snowflake_search_tool.py b/packages/tools/src/crewai_tools/tools/snowflake_search_tool/snowflake_search_tool.py index a4cd21044..9c8ceb987 100644 --- a/packages/tools/src/crewai_tools/tools/snowflake_search_tool/snowflake_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/snowflake_search_tool/snowflake_search_tool.py @@ -1,7 +1,7 @@ import asyncio import logging from concurrent.futures import ThreadPoolExecutor -from typing import TYPE_CHECKING, Any, Dict, List, Optional, Type +from typing import TYPE_CHECKING, Any from crewai.tools.base_tool import BaseTool from pydantic import BaseModel, ConfigDict, Field, SecretStr @@ -36,15 +36,13 @@ class SnowflakeConfig(BaseModel): ..., description="Snowflake account identifier", pattern=r"^[a-zA-Z0-9\-_]+$" ) user: str = Field(..., description="Snowflake username") - password: Optional[SecretStr] = Field(None, description="Snowflake password") - private_key_path: Optional[str] = Field( - None, description="Path to private key file" - ) - warehouse: Optional[str] = Field(None, description="Snowflake warehouse") - database: Optional[str] = Field(None, description="Default database") - snowflake_schema: Optional[str] = Field(None, description="Default schema") - role: Optional[str] = Field(None, description="Snowflake role") - session_parameters: Optional[Dict[str, Any]] = Field( + password: SecretStr | None = Field(None, description="Snowflake password") + private_key_path: str | None = Field(None, description="Path to private key file") + warehouse: str | None = Field(None, description="Snowflake warehouse") + database: str | None = Field(None, description="Default database") + snowflake_schema: str | None = Field(None, description="Default schema") + role: str | None = Field(None, description="Snowflake role") + session_parameters: dict[str, Any] | None = Field( default_factory=dict, description="Session parameters" ) @@ -63,9 +61,9 @@ class SnowflakeSearchToolInput(BaseModel): model_config = ConfigDict(protected_namespaces=()) query: str = Field(..., description="SQL query or semantic search query to execute") - database: Optional[str] = Field(None, description="Override default database") - snowflake_schema: Optional[str] = Field(None, description="Override default schema") - timeout: Optional[int] = Field(300, description="Query timeout in seconds") + database: str | None = Field(None, description="Override default database") + snowflake_schema: str | None = Field(None, description="Override default schema") + timeout: int | None = Field(300, description="Query timeout in seconds") class SnowflakeSearchTool(BaseTool): @@ -76,7 +74,7 @@ class SnowflakeSearchTool(BaseTool): "Execute SQL queries or semantic search on Snowflake data warehouse. " "Supports both raw SQL and natural language queries." ) - args_schema: Type[BaseModel] = SnowflakeSearchToolInput + args_schema: type[BaseModel] = SnowflakeSearchToolInput # Define Pydantic fields config: SnowflakeConfig = Field( @@ -95,11 +93,15 @@ class SnowflakeSearchTool(BaseTool): arbitrary_types_allowed=True, validate_assignment=True, frozen=False ) - _connection_pool: Optional[List["SnowflakeConnection"]] = None - _pool_lock: Optional[asyncio.Lock] = None - _thread_pool: Optional[ThreadPoolExecutor] = None + _connection_pool: list["SnowflakeConnection"] | None = None + _pool_lock: asyncio.Lock | None = None + _thread_pool: ThreadPoolExecutor | None = None _model_rebuilt: bool = False - package_dependencies: List[str] = ["snowflake-connector-python", "snowflake-sqlalchemy", "cryptography"] + package_dependencies: list[str] = [ + "snowflake-connector-python", + "snowflake-sqlalchemy", + "cryptography", + ] def __init__(self, **data): """Initialize SnowflakeSearchTool.""" @@ -184,7 +186,7 @@ class SnowflakeSearchTool(BaseTool): async def _execute_query( self, query: str, timeout: int = 300 - ) -> List[Dict[str, Any]]: + ) -> list[dict[str, Any]]: """Execute a query with retries and return results.""" if self.enable_caching: @@ -204,7 +206,10 @@ class SnowflakeSearchTool(BaseTool): return [] columns = [col[0] for col in cursor.description] - results = [dict(zip(columns, row)) for row in cursor.fetchall()] + results = [ + dict(zip(columns, row, strict=False)) + for row in cursor.fetchall() + ] if self.enable_caching: _query_cache[self._get_cache_key(query, timeout)] = results @@ -218,14 +223,15 @@ class SnowflakeSearchTool(BaseTool): if attempt == self.max_retries - 1: raise await asyncio.sleep(self.retry_delay * (2**attempt)) - logger.warning(f"Query failed, attempt {attempt + 1}: {str(e)}") + logger.warning(f"Query failed, attempt {attempt + 1}: {e!s}") continue + return None async def _run( self, query: str, - database: Optional[str] = None, - snowflake_schema: Optional[str] = None, + database: str | None = None, + snowflake_schema: str | None = None, timeout: int = 300, **kwargs: Any, ) -> Any: @@ -238,10 +244,9 @@ class SnowflakeSearchTool(BaseTool): if snowflake_schema: await self._execute_query(f"USE SCHEMA {snowflake_schema}") - results = await self._execute_query(query, timeout) - return results + return await self._execute_query(query, timeout) except Exception as e: - logger.error(f"Error executing query: {str(e)}") + logger.error(f"Error executing query: {e!s}") raise def __del__(self): diff --git a/packages/tools/src/crewai_tools/tools/spider_tool/spider_tool.py b/packages/tools/src/crewai_tools/tools/spider_tool/spider_tool.py index 3aee6ef88..137b54d26 100644 --- a/packages/tools/src/crewai_tools/tools/spider_tool/spider_tool.py +++ b/packages/tools/src/crewai_tools/tools/spider_tool/spider_tool.py @@ -1,5 +1,5 @@ import logging -from typing import Any, Dict, Literal, Optional, Type, List +from typing import Any, Literal from urllib.parse import unquote, urlparse from crewai.tools import BaseTool, EnvVar @@ -46,23 +46,25 @@ class SpiderTool(BaseTool): description: str = ( "A tool to scrape or crawl a website and return LLM-ready content." ) - args_schema: Type[BaseModel] = SpiderToolSchema - custom_params: Optional[Dict[str, Any]] = None - website_url: Optional[str] = None - api_key: Optional[str] = None + args_schema: type[BaseModel] = SpiderToolSchema + custom_params: dict[str, Any] | None = None + website_url: str | None = None + api_key: str | None = None spider: Any = None log_failures: bool = True config: SpiderToolConfig = SpiderToolConfig() - package_dependencies: List[str] = ["spider-client"] - env_vars: List[EnvVar] = [ - EnvVar(name="SPIDER_API_KEY", description="API key for Spider.cloud", required=True), + package_dependencies: list[str] = ["spider-client"] + env_vars: list[EnvVar] = [ + EnvVar( + name="SPIDER_API_KEY", description="API key for Spider.cloud", required=True + ), ] def __init__( self, - api_key: Optional[str] = None, - website_url: Optional[str] = None, - custom_params: Optional[Dict[str, Any]] = None, + api_key: str | None = None, + website_url: str | None = None, + custom_params: dict[str, Any] | None = None, log_failures: bool = True, **kwargs, ): @@ -135,7 +137,7 @@ class SpiderTool(BaseTool): self, website_url: str, mode: Literal["scrape", "crawl"] = "scrape", - ) -> Optional[str]: + ) -> str | None: """Execute the spider tool to scrape or crawl the specified website. Args: @@ -191,24 +193,24 @@ class SpiderTool(BaseTool): except ValueError as ve: if self.log_failures: - logger.error(f"Validation error for URL {url}: {str(ve)}") + logger.error(f"Validation error for URL {url}: {ve!s}") return None raise ve except ImportError as ie: - logger.error(f"Spider client import error: {str(ie)}") + logger.error(f"Spider client import error: {ie!s}") raise ie except ConnectionError as ce: if self.log_failures: - logger.error(f"Connection error while accessing {url}: {str(ce)}") + logger.error(f"Connection error while accessing {url}: {ce!s}") return None raise ce except Exception as e: if self.log_failures: logger.error( - f"Unexpected error during {mode} operation on {url}: {str(e)}" + f"Unexpected error during {mode} operation on {url}: {e!s}" ) return None raise e diff --git a/packages/tools/src/crewai_tools/tools/stagehand_tool/stagehand_tool.py b/packages/tools/src/crewai_tools/tools/stagehand_tool/stagehand_tool.py index d3a61f914..c33c3ce90 100644 --- a/packages/tools/src/crewai_tools/tools/stagehand_tool/stagehand_tool.py +++ b/packages/tools/src/crewai_tools/tools/stagehand_tool/stagehand_tool.py @@ -2,7 +2,7 @@ import asyncio import json import os import re -from typing import Any, Dict, List, Optional, Type, Union +from typing import Any from pydantic import BaseModel, Field @@ -52,10 +52,10 @@ class StagehandResult(BaseModel): success: bool = Field( ..., description="Whether the operation completed successfully" ) - data: Union[str, Dict, List] = Field( + data: str | dict | list = Field( ..., description="The result data from the operation" ) - error: Optional[str] = Field( + error: str | None = Field( None, description="Optional error message if the operation failed" ) @@ -63,15 +63,15 @@ class StagehandResult(BaseModel): class StagehandToolSchema(BaseModel): """Input for StagehandTool.""" - instruction: Optional[str] = Field( + instruction: str | None = Field( None, description="Single atomic action with location context. For reliability on complex pages, use ONE specific action with location hints. Good examples: 'Click the search input field in the header', 'Type Italy in the focused field', 'Press Enter', 'Click the first link in the results area'. Avoid combining multiple actions. For 'navigate' command type, this can be omitted if only URL is provided.", ) - url: Optional[str] = Field( + url: str | None = Field( None, description="The URL to navigate to before executing the instruction. MUST be used with 'navigate' command. ", ) - command_type: Optional[str] = Field( + command_type: str | None = Field( "act", description="""The type of command to execute (choose one): - 'act': Perform an action like clicking buttons, filling forms, etc. (default) @@ -133,14 +133,14 @@ class StagehandTool(BaseTool): - 'extract': For getting data from a specific page section - 'observe': For finding elements in a specific area """ - args_schema: Type[BaseModel] = StagehandToolSchema + args_schema: type[BaseModel] = StagehandToolSchema # Stagehand configuration - api_key: Optional[str] = None - project_id: Optional[str] = None - model_api_key: Optional[str] = None - model_name: Optional[AvailableModel] = AvailableModel.CLAUDE_3_7_SONNET_LATEST - server_url: Optional[str] = "https://api.stagehand.browserbase.com/v1" + api_key: str | None = None + project_id: str | None = None + model_api_key: str | None = None + model_name: AvailableModel | None = AvailableModel.CLAUDE_3_7_SONNET_LATEST + server_url: str | None = "https://api.stagehand.browserbase.com/v1" headless: bool = False dom_settle_timeout_ms: int = 3000 self_heal: bool = True @@ -152,24 +152,24 @@ class StagehandTool(BaseTool): use_simplified_dom: bool = True # Instance variables - _stagehand: Optional[Stagehand] = None - _page: Optional[StagehandPage] = None - _session_id: Optional[str] = None + _stagehand: Stagehand | None = None + _page: StagehandPage | None = None + _session_id: str | None = None _testing: bool = False def __init__( self, - api_key: Optional[str] = None, - project_id: Optional[str] = None, - model_api_key: Optional[str] = None, - model_name: Optional[str] = None, - server_url: Optional[str] = None, - session_id: Optional[str] = None, - headless: Optional[bool] = None, - dom_settle_timeout_ms: Optional[int] = None, - self_heal: Optional[bool] = None, - wait_for_captcha_solves: Optional[bool] = None, - verbose: Optional[int] = None, + api_key: str | None = None, + project_id: str | None = None, + model_api_key: str | None = None, + model_name: str | None = None, + server_url: str | None = None, + session_id: str | None = None, + headless: bool | None = None, + dom_settle_timeout_ms: int | None = None, + self_heal: bool | None = None, + wait_for_captcha_solves: bool | None = None, + verbose: int | None = None, _testing: bool = False, **kwargs, ): @@ -241,19 +241,18 @@ class StagehandTool(BaseTool): model_str = str(self.model_name) if "gpt" in model_str.lower(): return self.model_api_key or os.getenv("OPENAI_API_KEY") - elif "claude" in model_str.lower() or "anthropic" in model_str.lower(): + if "claude" in model_str.lower() or "anthropic" in model_str.lower(): return self.model_api_key or os.getenv("ANTHROPIC_API_KEY") - elif "gemini" in model_str.lower(): + if "gemini" in model_str.lower(): return self.model_api_key or os.getenv("GOOGLE_API_KEY") - else: - # Default to trying OpenAI, then Anthropic - return ( - self.model_api_key - or os.getenv("OPENAI_API_KEY") - or os.getenv("ANTHROPIC_API_KEY") - ) + # Default to trying OpenAI, then Anthropic + return ( + self.model_api_key + or os.getenv("OPENAI_API_KEY") + or os.getenv("ANTHROPIC_API_KEY") + ) - async def _setup_stagehand(self, session_id: Optional[str] = None): + async def _setup_stagehand(self, session_id: str | None = None): """Initialize Stagehand if not already set up.""" # If we're in testing mode, return mock objects @@ -342,7 +341,7 @@ class StagehandTool(BaseTool): return self._stagehand, self._page - def _extract_steps(self, instruction: str) -> List[str]: + def _extract_steps(self, instruction: str) -> list[str]: """Extract individual steps from multi-step instructions""" # Check for numbered steps (Step 1:, Step 2:, etc.) if re.search(r"Step \d+:", instruction, re.IGNORECASE): @@ -353,10 +352,9 @@ class StagehandTool(BaseTool): ) return [step.strip() for step in steps if step.strip()] # Check for semicolon-separated instructions - elif ";" in instruction: + if ";" in instruction: return [step.strip() for step in instruction.split(";") if step.strip()] - else: - return [instruction] + return [instruction] def _simplify_instruction(self, instruction: str) -> str: """Simplify complex instructions to basic actions""" @@ -367,27 +365,24 @@ class StagehandTool(BaseTool): # For search tasks, focus on the search action first if "type" in instruction_lower or "enter" in instruction_lower: return "click on the search input field" - else: - return "search for content on the page" - elif "click" in instruction_lower: + return "search for content on the page" + if "click" in instruction_lower: # Extract what to click if "button" in instruction_lower: return "click the button" - elif "link" in instruction_lower: + if "link" in instruction_lower: return "click the link" - elif "search" in instruction_lower: + if "search" in instruction_lower: return "click the search field" - else: - return "click on the element" - elif "type" in instruction_lower or "enter" in instruction_lower: + return "click on the element" + if "type" in instruction_lower or "enter" in instruction_lower: return "type in the input field" - else: - return instruction # Return as-is if can't simplify + return instruction # Return as-is if can't simplify async def _async_run( self, - instruction: Optional[str] = None, - url: Optional[str] = None, + instruction: str | None = None, + url: str | None = None, command_type: str = "act", ): """Override _async_run with improved atomic action handling""" @@ -490,12 +485,11 @@ class StagehandTool(BaseTool): False, results[0], results[0]["error"] ) return self._format_result(True, results[0]) - else: - # Multiple steps, return all results - has_errors = any("error" in result for result in results) - return self._format_result(not has_errors, {"steps": results}) + # Multiple steps, return all results + has_errors = any("error" in result for result in results) + return self._format_result(not has_errors, {"steps": results}) - elif command_type.lower() == "navigate": + if command_type.lower() == "navigate": # For navigation, use the goto method directly if not url: error_msg = "No URL provided for navigation. Please provide a URL." @@ -512,7 +506,7 @@ class StagehandTool(BaseTool): }, ) - elif command_type.lower() == "extract": + if command_type.lower() == "extract": # Create extract options with API key from stagehand.schemas import ExtractOptions @@ -528,7 +522,7 @@ class StagehandTool(BaseTool): self._logger.info(f"Extract operation completed successfully {result}") return self._format_result(True, result.model_dump()) - elif command_type.lower() == "observe": + if command_type.lower() == "observe": # Create observe options with API key from stagehand.schemas import ObserveOptions @@ -558,13 +552,12 @@ class StagehandTool(BaseTool): ) return self._format_result(True, formatted_results) - else: - error_msg = f"Unknown command type: {command_type}" - self._logger.error(error_msg) - return self._format_result(False, {}, error_msg) + error_msg = f"Unknown command type: {command_type}" + self._logger.error(error_msg) + return self._format_result(False, {}, error_msg) except Exception as e: - error_msg = f"Error using Stagehand: {str(e)}" + error_msg = f"Error using Stagehand: {e!s}" self._logger.error(f"Operation failed: {error_msg}") return self._format_result(False, {}, error_msg) @@ -574,8 +567,8 @@ class StagehandTool(BaseTool): def _run( self, - instruction: Optional[str] = None, - url: Optional[str] = None, + instruction: str | None = None, + url: str | None = None, command_type: str = "act", ) -> str: """ @@ -633,13 +626,10 @@ class StagehandTool(BaseTool): f"Step {i + 1}: {step.get('message', 'Completed')}" ) return "\n".join(step_messages) - else: - return ( - f"Action result: {result.data.get('message', 'Completed')}" - ) - elif command_type.lower() == "extract": + return f"Action result: {result.data.get('message', 'Completed')}" + if command_type.lower() == "extract": return f"Extracted data: {json.dumps(result.data, indent=2)}" - elif command_type.lower() == "observe": + if command_type.lower() == "observe": formatted_results = [] for element in result.data: formatted_results.append( @@ -650,10 +640,8 @@ class StagehandTool(BaseTool): f"Suggested action: {element['method']}" ) return "\n".join(formatted_results) - else: - return json.dumps(result.data, indent=2) - else: - return f"Error: {result.error}" + return json.dumps(result.data, indent=2) + return f"Error: {result.error}" except RuntimeError: # No event loop exists, create one @@ -662,10 +650,8 @@ class StagehandTool(BaseTool): if result.success: if isinstance(result.data, dict): return json.dumps(result.data, indent=2) - else: - return str(result.data) - else: - return f"Error: {result.error}" + return str(result.data) + return f"Error: {result.error}" async def _async_close(self): """Asynchronously clean up Stagehand resources.""" @@ -715,7 +701,7 @@ class StagehandTool(BaseTool): self._stagehand.close() except Exception as e: # Log but don't raise - we're cleaning up - print(f"Error closing Stagehand: {str(e)}") + print(f"Error closing Stagehand: {e!s}") self._stagehand = None @@ -729,4 +715,3 @@ class StagehandTool(BaseTool): def __exit__(self, exc_type, exc_val, exc_tb): """Exit the context manager and clean up resources.""" self.close() - diff --git a/packages/tools/src/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py b/packages/tools/src/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py index 5e8a760ee..1ddb0ee70 100644 --- a/packages/tools/src/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py +++ b/packages/tools/src/crewai_tools/tools/tavily_extractor_tool/tavily_extractor_tool.py @@ -1,13 +1,14 @@ -from crewai.tools import BaseTool, EnvVar -from pydantic import BaseModel, Field -from typing import Optional, Type, Any, Union, List, Literal -from dotenv import load_dotenv -import os import json +import os +from typing import Any, Literal + +from crewai.tools import BaseTool, EnvVar +from dotenv import load_dotenv +from pydantic import BaseModel, Field load_dotenv() try: - from tavily import TavilyClient, AsyncTavilyClient + from tavily import AsyncTavilyClient, TavilyClient TAVILY_AVAILABLE = True except ImportError: @@ -19,16 +20,20 @@ except ImportError: class TavilyExtractorToolSchema(BaseModel): """Input schema for TavilyExtractorTool.""" - urls: Union[List[str], str] = Field( + urls: list[str] | str = Field( ..., description="The URL(s) to extract data from. Can be a single URL or a list of URLs.", ) class TavilyExtractorTool(BaseTool): - package_dependencies: List[str] = ["tavily-python"] - env_vars: List[EnvVar] = [ - EnvVar(name="TAVILY_API_KEY", description="API key for Tavily extraction service", required=True), + package_dependencies: list[str] = ["tavily-python"] + env_vars: list[EnvVar] = [ + EnvVar( + name="TAVILY_API_KEY", + description="API key for Tavily extraction service", + required=True, + ), ] """ Tool that uses the Tavily API to extract content from web pages. @@ -47,16 +52,16 @@ class TavilyExtractorTool(BaseTool): """ model_config = {"arbitrary_types_allowed": True} - client: Optional[TavilyClient] = None - async_client: Optional[AsyncTavilyClient] = None + client: TavilyClient | None = None + async_client: AsyncTavilyClient | None = None name: str = "TavilyExtractorTool" description: str = "Extracts content from one or more web pages using the Tavily API. Returns structured data." - args_schema: Type[BaseModel] = TavilyExtractorToolSchema - api_key: Optional[str] = Field( + args_schema: type[BaseModel] = TavilyExtractorToolSchema + api_key: str | None = Field( default_factory=lambda: os.getenv("TAVILY_API_KEY"), description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", ) - proxies: Optional[dict[str, str]] = Field( + proxies: dict[str, str] | None = Field( default=None, description="Optional proxies to use for the Tavily API requests.", ) @@ -88,8 +93,9 @@ class TavilyExtractorTool(BaseTool): ) else: try: - import click import subprocess + + import click except ImportError: raise ImportError( "The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. " @@ -117,7 +123,7 @@ class TavilyExtractorTool(BaseTool): def _run( self, - urls: Union[List[str], str], + urls: list[str] | str, ) -> str: """ Synchronously extracts content from the given URL(s). @@ -145,7 +151,7 @@ class TavilyExtractorTool(BaseTool): async def _arun( self, - urls: Union[List[str], str], + urls: list[str] | str, ) -> str: """ Asynchronously extracts content from the given URL(s). diff --git a/packages/tools/src/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py b/packages/tools/src/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py index 2f9d6dcca..8b1bfb762 100644 --- a/packages/tools/src/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/tavily_search_tool/tavily_search_tool.py @@ -1,13 +1,15 @@ -from crewai.tools import BaseTool, EnvVar -from pydantic import BaseModel, Field -from typing import Optional, Type, Any, Union, Literal, Sequence, List -from dotenv import load_dotenv -import os import json +import os +from collections.abc import Sequence +from typing import Any, Literal + +from crewai.tools import BaseTool, EnvVar +from dotenv import load_dotenv +from pydantic import BaseModel, Field load_dotenv() try: - from tavily import TavilyClient, AsyncTavilyClient + from tavily import AsyncTavilyClient, TavilyClient TAVILY_AVAILABLE = True except ImportError: @@ -49,19 +51,19 @@ class TavilySearchTool(BaseTool): """ model_config = {"arbitrary_types_allowed": True} - client: Optional[TavilyClient] = None - async_client: Optional[AsyncTavilyClient] = None + client: TavilyClient | None = None + async_client: AsyncTavilyClient | None = None name: str = "Tavily Search" description: str = ( "A tool that performs web searches using the Tavily Search API. " "It returns a JSON object containing the search results." ) - args_schema: Type[BaseModel] = TavilySearchToolSchema - api_key: Optional[str] = Field( + args_schema: type[BaseModel] = TavilySearchToolSchema + api_key: str | None = Field( default_factory=lambda: os.getenv("TAVILY_API_KEY"), description="The Tavily API key. If not provided, it will be loaded from the environment variable TAVILY_API_KEY.", ) - proxies: Optional[dict[str, str]] = Field( + proxies: dict[str, str] | None = Field( default=None, description="Optional proxies to use for the Tavily API requests.", ) @@ -71,20 +73,20 @@ class TavilySearchTool(BaseTool): topic: Literal["general", "news", "finance"] = Field( default="general", description="The topic to focus the search on." ) - time_range: Optional[Literal["day", "week", "month", "year"]] = Field( + time_range: Literal["day", "week", "month", "year"] | None = Field( default=None, description="The time range for the search." ) days: int = Field(default=7, description="The number of days to search back.") max_results: int = Field( default=5, description="The maximum number of results to return." ) - include_domains: Optional[Sequence[str]] = Field( + include_domains: Sequence[str] | None = Field( default=None, description="A list of domains to include in the search." ) - exclude_domains: Optional[Sequence[str]] = Field( + exclude_domains: Sequence[str] | None = Field( default=None, description="A list of domains to exclude from the search." ) - include_answer: Union[bool, Literal["basic", "advanced"]] = Field( + include_answer: bool | Literal["basic", "advanced"] = Field( default=False, description="Whether to include a direct answer to the query." ) include_raw_content: bool = Field( @@ -101,9 +103,13 @@ class TavilySearchTool(BaseTool): default=1000, description="Maximum length for the 'content' of each search result to avoid context window issues.", ) - package_dependencies: List[str] = ["tavily-python"] - env_vars: List[EnvVar] = [ - EnvVar(name="TAVILY_API_KEY", description="API key for Tavily search service", required=True), + package_dependencies: list[str] = ["tavily-python"] + env_vars: list[EnvVar] = [ + EnvVar( + name="TAVILY_API_KEY", + description="API key for Tavily search service", + required=True, + ), ] def __init__(self, **kwargs: Any): @@ -115,8 +121,9 @@ class TavilySearchTool(BaseTool): ) else: try: - import click import subprocess + + import click except ImportError: raise ImportError( "The 'tavily-python' package is required. 'click' and 'subprocess' are also needed to assist with installation if the package is missing. " diff --git a/packages/tools/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py b/packages/tools/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py index 93d696ab1..7b45875cf 100644 --- a/packages/tools/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/txt_search_tool/txt_search_tool.py @@ -1,5 +1,3 @@ -from typing import Optional, Type - from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -25,9 +23,9 @@ class TXTSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a txt's content." ) - args_schema: Type[BaseModel] = TXTSearchToolSchema + args_schema: type[BaseModel] = TXTSearchToolSchema - def __init__(self, txt: Optional[str] = None, **kwargs): + def __init__(self, txt: str | None = None, **kwargs): super().__init__(**kwargs) if txt is not None: self.add(txt) @@ -38,8 +36,12 @@ class TXTSearchTool(RagTool): def _run( self, search_query: str, - txt: Optional[str] = None, + txt: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if txt is not None: self.add(txt) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/vision_tool/vision_tool.py b/packages/tools/src/crewai_tools/tools/vision_tool/vision_tool.py index 6df658898..0c57ed2e6 100644 --- a/packages/tools/src/crewai_tools/tools/vision_tool/vision_tool.py +++ b/packages/tools/src/crewai_tools/tools/vision_tool/vision_tool.py @@ -1,6 +1,5 @@ import base64 from pathlib import Path -from typing import List, Optional, Type from crewai import LLM from crewai.tools import BaseTool, EnvVar @@ -13,7 +12,7 @@ class ImagePromptSchema(BaseModel): image_path_url: str = "The image path or URL." @field_validator("image_path_url") - def validate_image_path_url(cls, v: str) -> str: + def validate_image_path_url(self, v: str) -> str: if v.startswith("http"): return v @@ -43,15 +42,19 @@ class VisionTool(BaseTool): description: str = ( "This tool uses OpenAI's Vision API to describe the contents of an image." ) - args_schema: Type[BaseModel] = ImagePromptSchema - env_vars: List[EnvVar] = [ - EnvVar(name="OPENAI_API_KEY", description="API key for OpenAI services", required=True), + args_schema: type[BaseModel] = ImagePromptSchema + env_vars: list[EnvVar] = [ + EnvVar( + name="OPENAI_API_KEY", + description="API key for OpenAI services", + required=True, + ), ] _model: str = PrivateAttr(default="gpt-4o-mini") - _llm: Optional[LLM] = PrivateAttr(default=None) + _llm: LLM | None = PrivateAttr(default=None) - def __init__(self, llm: Optional[LLM] = None, model: str = "gpt-4o-mini", **kwargs): + def __init__(self, llm: LLM | None = None, model: str = "gpt-4o-mini", **kwargs): """Initialize the vision tool. Args: @@ -97,9 +100,9 @@ class VisionTool(BaseTool): base64_image = self._encode_image(image_path_url) image_data = f"data:image/jpeg;base64,{base64_image}" except Exception as e: - return f"Error processing image: {str(e)}" + return f"Error processing image: {e!s}" - response = self.llm.call( + return self.llm.call( messages=[ { "role": "user", @@ -113,9 +116,8 @@ class VisionTool(BaseTool): }, ], ) - return response except Exception as e: - return f"An error occurred: {str(e)}" + return f"An error occurred: {e!s}" def _encode_image(self, image_path: str) -> str: """Encode an image file as base64. diff --git a/packages/tools/src/crewai_tools/tools/weaviate_tool/vector_search.py b/packages/tools/src/crewai_tools/tools/weaviate_tool/vector_search.py index c75dd03da..fc5b73c64 100644 --- a/packages/tools/src/crewai_tools/tools/weaviate_tool/vector_search.py +++ b/packages/tools/src/crewai_tools/tools/weaviate_tool/vector_search.py @@ -1,6 +1,6 @@ import json import os -from typing import Any, Optional, Type, List +from typing import Any try: import weaviate @@ -31,19 +31,23 @@ class WeaviateToolSchema(BaseModel): class WeaviateVectorSearchTool(BaseTool): """Tool to search the Weaviate database""" - package_dependencies: List[str] = ["weaviate-client"] + package_dependencies: list[str] = ["weaviate-client"] name: str = "WeaviateVectorSearchTool" description: str = "A tool to search the Weaviate database for relevant information on internal documents." - args_schema: Type[BaseModel] = WeaviateToolSchema - query: Optional[str] = None - vectorizer: Optional[Vectorizers] = None - generative_model: Optional[str] = None - collection_name: Optional[str] = None - limit: Optional[int] = Field(default=3) - headers: Optional[dict] = None - alpha: Optional[int] = Field(default=0.75) - env_vars: List[EnvVar] = [ - EnvVar(name="OPENAI_API_KEY", description="OpenAI API key for embedding generation and retrieval", required=True), + args_schema: type[BaseModel] = WeaviateToolSchema + query: str | None = None + vectorizer: Vectorizers | None = None + generative_model: str | None = None + collection_name: str | None = None + limit: int | None = Field(default=3) + headers: dict | None = None + alpha: int | None = Field(default=0.75) + env_vars: list[EnvVar] = [ + EnvVar( + name="OPENAI_API_KEY", + description="OpenAI API key for embedding generation and retrieval", + required=True, + ), ] weaviate_cluster_url: str = Field( ..., @@ -53,7 +57,6 @@ class WeaviateVectorSearchTool(BaseTool): ..., description="The API key for the Weaviate cluster", ) - package_dependencies: List[str] = ["weaviate-client"] def __init__(self, **kwargs): super().__init__(**kwargs) @@ -112,9 +115,7 @@ class WeaviateVectorSearchTool(BaseTool): ) response = internal_docs.query.hybrid( - query=query, - limit=self.limit, - alpha=self.alpha + query=query, limit=self.limit, alpha=self.alpha ) json_response = "" for obj in response.objects: diff --git a/packages/tools/src/crewai_tools/tools/website_search/website_search_tool.py b/packages/tools/src/crewai_tools/tools/website_search/website_search_tool.py index 9728b44db..ab7ffd24c 100644 --- a/packages/tools/src/crewai_tools/tools/website_search/website_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/website_search/website_search_tool.py @@ -1,11 +1,4 @@ -from typing import Any, Optional, Type - -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -30,12 +23,10 @@ class WebsiteSearchToolSchema(FixedWebsiteSearchToolSchema): class WebsiteSearchTool(RagTool): name: str = "Search in a specific website" - description: str = ( - "A tool that can be used to semantic search a query from a specific URL content." - ) - args_schema: Type[BaseModel] = WebsiteSearchToolSchema + description: str = "A tool that can be used to semantic search a query from a specific URL content." + args_schema: type[BaseModel] = WebsiteSearchToolSchema - def __init__(self, website: Optional[str] = None, **kwargs): + def __init__(self, website: str | None = None, **kwargs): super().__init__(**kwargs) if website is not None: self.add(website) @@ -44,15 +35,17 @@ class WebsiteSearchTool(RagTool): self._generate_description() def add(self, website: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") - super().add(website, data_type=DataType.WEB_PAGE) + super().add(website, data_type=DataType.WEBSITE) def _run( self, search_query: str, - website: Optional[str] = None, + website: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if website is not None: self.add(website) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py b/packages/tools/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py index 426b0ca38..0842ca1b9 100644 --- a/packages/tools/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/xml_search_tool/xml_search_tool.py @@ -1,5 +1,3 @@ -from typing import Optional, Type - from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -25,9 +23,9 @@ class XMLSearchTool(RagTool): description: str = ( "A tool that can be used to semantic search a query from a XML's content." ) - args_schema: Type[BaseModel] = XMLSearchToolSchema + args_schema: type[BaseModel] = XMLSearchToolSchema - def __init__(self, xml: Optional[str] = None, **kwargs): + def __init__(self, xml: str | None = None, **kwargs): super().__init__(**kwargs) if xml is not None: self.add(xml) @@ -38,8 +36,12 @@ class XMLSearchTool(RagTool): def _run( self, search_query: str, - xml: Optional[str] = None, + xml: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if xml is not None: self.add(xml) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py b/packages/tools/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py index 6d16a708d..d35b19377 100644 --- a/packages/tools/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/youtube_channel_search_tool/youtube_channel_search_tool.py @@ -1,11 +1,4 @@ -from typing import Any, Optional, Type - -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -30,12 +23,10 @@ class YoutubeChannelSearchToolSchema(FixedYoutubeChannelSearchToolSchema): class YoutubeChannelSearchTool(RagTool): name: str = "Search a Youtube Channels content" - description: str = ( - "A tool that can be used to semantic search a query from a Youtube Channels content." - ) - args_schema: Type[BaseModel] = YoutubeChannelSearchToolSchema + description: str = "A tool that can be used to semantic search a query from a Youtube Channels content." + args_schema: type[BaseModel] = YoutubeChannelSearchToolSchema - def __init__(self, youtube_channel_handle: Optional[str] = None, **kwargs): + def __init__(self, youtube_channel_handle: str | None = None, **kwargs): super().__init__(**kwargs) if youtube_channel_handle is not None: self.add(youtube_channel_handle) @@ -54,8 +45,12 @@ class YoutubeChannelSearchTool(RagTool): def _run( self, search_query: str, - youtube_channel_handle: Optional[str] = None, + youtube_channel_handle: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if youtube_channel_handle is not None: self.add(youtube_channel_handle) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py b/packages/tools/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py index b93cc6c29..0cd810113 100644 --- a/packages/tools/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py +++ b/packages/tools/src/crewai_tools/tools/youtube_video_search_tool/youtube_video_search_tool.py @@ -1,11 +1,4 @@ -from typing import Any, Optional, Type - -try: - from embedchain.models.data_type import DataType - EMBEDCHAIN_AVAILABLE = True -except ImportError: - EMBEDCHAIN_AVAILABLE = False - +from crewai_tools.rag.data_types import DataType from pydantic import BaseModel, Field from ..rag.rag_tool import RagTool @@ -30,12 +23,10 @@ class YoutubeVideoSearchToolSchema(FixedYoutubeVideoSearchToolSchema): class YoutubeVideoSearchTool(RagTool): name: str = "Search a Youtube Video content" - description: str = ( - "A tool that can be used to semantic search a query from a Youtube Video content." - ) - args_schema: Type[BaseModel] = YoutubeVideoSearchToolSchema + description: str = "A tool that can be used to semantic search a query from a Youtube Video content." + args_schema: type[BaseModel] = YoutubeVideoSearchToolSchema - def __init__(self, youtube_video_url: Optional[str] = None, **kwargs): + def __init__(self, youtube_video_url: str | None = None, **kwargs): super().__init__(**kwargs) if youtube_video_url is not None: self.add(youtube_video_url) @@ -44,15 +35,17 @@ class YoutubeVideoSearchTool(RagTool): self._generate_description() def add(self, youtube_video_url: str) -> None: - if not EMBEDCHAIN_AVAILABLE: - raise ImportError("embedchain is not installed. Please install it with `pip install crewai-tools[embedchain]`") super().add(youtube_video_url, data_type=DataType.YOUTUBE_VIDEO) def _run( self, search_query: str, - youtube_video_url: Optional[str] = None, + youtube_video_url: str | None = None, + similarity_threshold: float | None = None, + limit: int | None = None, ) -> str: if youtube_video_url is not None: self.add(youtube_video_url) - return super()._run(query=search_query) + return super()._run( + query=search_query, similarity_threshold=similarity_threshold, limit=limit + ) diff --git a/packages/tools/src/crewai_tools/tools/zapier_action_tool/zapier_action_tool.py b/packages/tools/src/crewai_tools/tools/zapier_action_tool/zapier_action_tool.py index 190ef3fc3..1c7ab57f8 100644 --- a/packages/tools/src/crewai_tools/tools/zapier_action_tool/zapier_action_tool.py +++ b/packages/tools/src/crewai_tools/tools/zapier_action_tool/zapier_action_tool.py @@ -1,15 +1,15 @@ -import os import logging -from typing import List, Optional +import os + from crewai.tools import BaseTool from crewai_tools.adapters.zapier_adapter import ZapierActionsAdapter logger = logging.getLogger(__name__) -def ZapierActionTools( - zapier_api_key: Optional[str] = None, action_list: Optional[List[str]] = None -) -> List[BaseTool]: +def ZapierActionTools( # noqa: N802 + zapier_api_key: str | None = None, action_list: list[str] | None = None +) -> list[BaseTool]: """Factory function that returns Zapier action tools. Args: diff --git a/packages/tools/tests/tools/rag/rag_tool_test.py b/packages/tools/tests/tools/rag/rag_tool_test.py index 42baccc2c..693cd120a 100644 --- a/packages/tools/tests/tools/rag/rag_tool_test.py +++ b/packages/tools/tests/tools/rag/rag_tool_test.py @@ -1,43 +1,54 @@ -import os -from tempfile import NamedTemporaryFile +from tempfile import TemporaryDirectory from typing import cast -from unittest import mock +from pathlib import Path -from pytest import fixture -from crewai_tools.adapters.embedchain_adapter import EmbedchainAdapter +from crewai_tools.adapters.crewai_rag_adapter import CrewAIRagAdapter from crewai_tools.tools.rag.rag_tool import RagTool -@fixture(autouse=True) -def mock_embedchain_db_uri(): - with NamedTemporaryFile() as tmp: - uri = f"sqlite:///{tmp.name}" - with mock.patch.dict(os.environ, {"EMBEDCHAIN_DB_URI": uri}): - yield - - -def test_custom_llm_and_embedder(): +def test_rag_tool_initialization(): + """Test that RagTool initializes with CrewAI adapter by default.""" class MyTool(RagTool): pass - tool = MyTool( - config=dict( - llm=dict( - provider="openai", - config=dict(model="gpt-3.5-custom"), - ), - embedder=dict( - provider="openai", - config=dict(model="text-embedding-3-custom"), - ), - ) - ) + tool = MyTool() assert tool.adapter is not None - assert isinstance(tool.adapter, EmbedchainAdapter) + assert isinstance(tool.adapter, CrewAIRagAdapter) + + adapter = cast(CrewAIRagAdapter, tool.adapter) + assert adapter.collection_name == "rag_tool_collection" + assert adapter._client is not None - adapter = cast(EmbedchainAdapter, tool.adapter) - assert adapter.embedchain_app.llm.config.model == "gpt-3.5-custom" - assert ( - adapter.embedchain_app.embedding_model.config.model == "text-embedding-3-custom" - ) + +def test_rag_tool_add_and_query(): + """Test adding content and querying with RagTool.""" + class MyTool(RagTool): + pass + + tool = MyTool() + + tool.add("The sky is blue on a clear day.") + tool.add("Machine learning is a subset of artificial intelligence.") + + result = tool._run(query="What color is the sky?") + assert "Relevant Content:" in result + + result = tool._run(query="Tell me about machine learning") + assert "Relevant Content:" in result + + +def test_rag_tool_with_file(): + """Test RagTool with file content.""" + with TemporaryDirectory() as tmpdir: + test_file = Path(tmpdir) / "test.txt" + test_file.write_text("Python is a programming language known for its simplicity.") + + class MyTool(RagTool): + pass + + tool = MyTool() + tool.add(str(test_file)) + + result = tool._run(query="What is Python?") + assert "Relevant Content:" in result diff --git a/packages/tools/tests/tools/test_search_tools.py b/packages/tools/tests/tools/test_search_tools.py index eaa0c591c..b912ef005 100644 --- a/packages/tools/tests/tools/test_search_tools.py +++ b/packages/tools/tests/tools/test_search_tools.py @@ -1,11 +1,11 @@ import os import tempfile from pathlib import Path -from unittest.mock import ANY, MagicMock +from unittest.mock import MagicMock import pytest -from embedchain.models.data_type import DataType +from crewai_tools.rag.data_types import DataType from crewai_tools.tools import ( CodeDocsSearchTool, CSVSearchTool, @@ -49,7 +49,7 @@ def test_pdf_search_tool(mock_adapter): result = tool._run(query="test content") assert "this is a test" in result.lower() mock_adapter.add.assert_called_once_with("test.pdf", data_type=DataType.PDF_FILE) - mock_adapter.query.assert_called_once_with("test content") + mock_adapter.query.assert_called_once_with("test content", similarity_threshold=0.6, limit=5) mock_adapter.query.reset_mock() mock_adapter.add.reset_mock() @@ -58,7 +58,7 @@ def test_pdf_search_tool(mock_adapter): result = tool._run(pdf="test.pdf", query="test content") assert "this is a test" in result.lower() mock_adapter.add.assert_called_once_with("test.pdf", data_type=DataType.PDF_FILE) - mock_adapter.query.assert_called_once_with("test content") + mock_adapter.query.assert_called_once_with("test content", similarity_threshold=0.6, limit=5) def test_txt_search_tool(): @@ -82,7 +82,7 @@ def test_docx_search_tool(mock_adapter): result = tool._run(search_query="test content") assert "this is a test" in result.lower() mock_adapter.add.assert_called_once_with("test.docx", data_type=DataType.DOCX) - mock_adapter.query.assert_called_once_with("test content") + mock_adapter.query.assert_called_once_with("test content", similarity_threshold=0.6, limit=5) mock_adapter.query.reset_mock() mock_adapter.add.reset_mock() @@ -91,7 +91,7 @@ def test_docx_search_tool(mock_adapter): result = tool._run(docx="test.docx", search_query="test content") assert "this is a test" in result.lower() mock_adapter.add.assert_called_once_with("test.docx", data_type=DataType.DOCX) - mock_adapter.query.assert_called_once_with("test content") + mock_adapter.query.assert_called_once_with("test content", similarity_threshold=0.6, limit=5) def test_json_search_tool(): @@ -114,7 +114,7 @@ def test_xml_search_tool(mock_adapter): result = tool._run(search_query="test XML", xml="test.xml") assert "this is a test" in result.lower() mock_adapter.add.assert_called_once_with("test.xml") - mock_adapter.query.assert_called_once_with("test XML") + mock_adapter.query.assert_called_once_with("test XML", similarity_threshold=0.6, limit=5) def test_csv_search_tool(): @@ -153,8 +153,8 @@ def test_website_search_tool(mock_adapter): tool = WebsiteSearchTool(website=website, adapter=mock_adapter) result = tool._run(search_query=search_query) - mock_adapter.query.assert_called_once_with("what is crewai?") - mock_adapter.add.assert_called_once_with(website, data_type=DataType.WEB_PAGE) + mock_adapter.query.assert_called_once_with("what is crewai?", similarity_threshold=0.6, limit=5) + mock_adapter.add.assert_called_once_with(website, data_type=DataType.WEBSITE) assert "this is a test" in result.lower() @@ -164,8 +164,8 @@ def test_website_search_tool(mock_adapter): tool = WebsiteSearchTool(adapter=mock_adapter) result = tool._run(website=website, search_query=search_query) - mock_adapter.query.assert_called_once_with("what is crewai?") - mock_adapter.add.assert_called_once_with(website, data_type=DataType.WEB_PAGE) + mock_adapter.query.assert_called_once_with("what is crewai?", similarity_threshold=0.6, limit=5) + mock_adapter.add.assert_called_once_with(website, data_type=DataType.WEBSITE) assert "this is a test" in result.lower() @@ -185,7 +185,7 @@ def test_youtube_video_search_tool(mock_adapter): mock_adapter.add.assert_called_once_with( youtube_video_url, data_type=DataType.YOUTUBE_VIDEO ) - mock_adapter.query.assert_called_once_with(search_query) + mock_adapter.query.assert_called_once_with(search_query, similarity_threshold=0.6, limit=5) mock_adapter.query.reset_mock() mock_adapter.add.reset_mock() @@ -197,7 +197,7 @@ def test_youtube_video_search_tool(mock_adapter): mock_adapter.add.assert_called_once_with( youtube_video_url, data_type=DataType.YOUTUBE_VIDEO ) - mock_adapter.query.assert_called_once_with(search_query) + mock_adapter.query.assert_called_once_with(search_query, similarity_threshold=0.6, limit=5) def test_youtube_channel_search_tool(mock_adapter): @@ -213,7 +213,7 @@ def test_youtube_channel_search_tool(mock_adapter): mock_adapter.add.assert_called_once_with( youtube_channel_handle, data_type=DataType.YOUTUBE_CHANNEL ) - mock_adapter.query.assert_called_once_with(search_query) + mock_adapter.query.assert_called_once_with(search_query, similarity_threshold=0.6, limit=5) mock_adapter.query.reset_mock() mock_adapter.add.reset_mock() @@ -227,7 +227,7 @@ def test_youtube_channel_search_tool(mock_adapter): mock_adapter.add.assert_called_once_with( youtube_channel_handle, data_type=DataType.YOUTUBE_CHANNEL ) - mock_adapter.query.assert_called_once_with(search_query) + mock_adapter.query.assert_called_once_with(search_query, similarity_threshold=0.6, limit=5) def test_code_docs_search_tool(mock_adapter): @@ -239,7 +239,7 @@ def test_code_docs_search_tool(mock_adapter): result = tool._run(search_query=search_query) assert "test documentation" in result mock_adapter.add.assert_called_once_with(docs_url, data_type=DataType.DOCS_SITE) - mock_adapter.query.assert_called_once_with(search_query) + mock_adapter.query.assert_called_once_with(search_query, similarity_threshold=0.6, limit=5) mock_adapter.query.reset_mock() mock_adapter.add.reset_mock() @@ -248,7 +248,7 @@ def test_code_docs_search_tool(mock_adapter): result = tool._run(docs_url=docs_url, search_query=search_query) assert "test documentation" in result mock_adapter.add.assert_called_once_with(docs_url, data_type=DataType.DOCS_SITE) - mock_adapter.query.assert_called_once_with(search_query) + mock_adapter.query.assert_called_once_with(search_query, similarity_threshold=0.6, limit=5) def test_github_search_tool(mock_adapter): @@ -264,9 +264,11 @@ def test_github_search_tool(mock_adapter): result = tool._run(search_query="tell me about crewai repo") assert "repo description" in result mock_adapter.add.assert_called_once_with( - "repo:crewai/crewai type:code", data_type="github", loader=ANY + "https://github.com/crewai/crewai", + data_type=DataType.GITHUB, + metadata={"content_types": ["code"], "gh_token": "test_token"} ) - mock_adapter.query.assert_called_once_with("tell me about crewai repo") + mock_adapter.query.assert_called_once_with("tell me about crewai repo", similarity_threshold=0.6, limit=5) # ensure content types provided by run call is used mock_adapter.query.reset_mock() @@ -280,9 +282,11 @@ def test_github_search_tool(mock_adapter): ) assert "repo description" in result mock_adapter.add.assert_called_once_with( - "repo:crewai/crewai type:code,issue", data_type="github", loader=ANY + "https://github.com/crewai/crewai", + data_type=DataType.GITHUB, + metadata={"content_types": ["code", "issue"], "gh_token": "test_token"} ) - mock_adapter.query.assert_called_once_with("tell me about crewai repo") + mock_adapter.query.assert_called_once_with("tell me about crewai repo", similarity_threshold=0.6, limit=5) # ensure default content types are used if not provided mock_adapter.query.reset_mock() @@ -295,9 +299,11 @@ def test_github_search_tool(mock_adapter): ) assert "repo description" in result mock_adapter.add.assert_called_once_with( - "repo:crewai/crewai type:code,repo,pr,issue", data_type="github", loader=ANY + "https://github.com/crewai/crewai", + data_type=DataType.GITHUB, + metadata={"content_types": ["code", "repo", "pr", "issue"], "gh_token": "test_token"} ) - mock_adapter.query.assert_called_once_with("tell me about crewai repo") + mock_adapter.query.assert_called_once_with("tell me about crewai repo", similarity_threshold=0.6, limit=5) # ensure nothing is added if no repo is provided mock_adapter.query.reset_mock() @@ -306,4 +312,4 @@ def test_github_search_tool(mock_adapter): tool = GithubSearchTool(gh_token="test_token", adapter=mock_adapter) result = tool._run(search_query="tell me about crewai repo") mock_adapter.add.assert_not_called() - mock_adapter.query.assert_called_once_with("tell me about crewai repo") + mock_adapter.query.assert_called_once_with("tell me about crewai repo", similarity_threshold=0.6, limit=5) diff --git a/pyproject.toml b/pyproject.toml index d897c516d..7161fcc49 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -78,10 +78,17 @@ select = [ "I001", # sort imports "I002", # remove unused imports ] -ignore = ["E501"] # ignore line too long +ignore = [ + "E501", # ignore line too long + "S607", # Starting a process with a partial executable path - OK for uv/pip commands +] [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = ["S101"] # Allow assert statements in tests +"packages/*/tests/**/*.py" = ["S101"] # Allow assert statements in tests +"**/*_test.py" = ["S101"] # Allow assert statements in test files +"**/test_*.py" = ["S101"] # Allow assert statements in test files +"**/__init__.py" = ["F401"] # Allow unused imports in __init__ files (re-exports) [tool.pytest.ini_options] testpaths = [ diff --git a/uv.lock b/uv.lock index 6f899d94d..41b0df2b3 100644 --- a/uv.lock +++ b/uv.lock @@ -188,21 +188,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d0/f7/a2799bf017d0303bb2f6c10f55f9c85619a0c8b9cf77fb8a9579961bfe88/aisuite-0.1.11-py3-none-any.whl", hash = "sha256:14293e9b7d81268dabe9b1cbb41cab64ca6c0272b52166213a7fa80196140d7c", size = 41222, upload-time = "2025-03-26T12:04:42.472Z" }, ] -[[package]] -name = "alembic" -version = "1.16.2" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "mako" }, - { name = "sqlalchemy" }, - { name = "tomli", marker = "python_full_version < '3.11'" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9c/35/116797ff14635e496bbda0c168987f5326a6555b09312e9b817e360d1f56/alembic-1.16.2.tar.gz", hash = "sha256:e53c38ff88dadb92eb22f8b150708367db731d58ad7e9d417c9168ab516cbed8", size = 1963563, upload-time = "2025-06-16T18:05:08.566Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/dd/e2/88e425adac5ad887a087c38d04fe2030010572a3e0e627f8a6e8c33eeda8/alembic-1.16.2-py3-none-any.whl", hash = "sha256:5f42e9bd0afdbd1d5e3ad856c01754530367debdebf21ed6894e34af52b3bb03", size = 242717, upload-time = "2025-06-16T18:05:10.27Z" }, -] - [[package]] name = "annotated-types" version = "0.7.0" @@ -757,26 +742,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/85/32/10bb5764d90a8eee674e9dc6f4db6a0ab47c8c4d0d83c27f7c39ac415a4d/click-8.2.1-py3-none-any.whl", hash = "sha256:61a3265b914e850b85317d0b3109c7f8cd35a670f963866005d6ef1d5175a12b", size = 102215, upload-time = "2025-05-20T23:19:47.796Z" }, ] -[[package]] -name = "cohere" -version = "5.15.0" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "fastavro" }, - { name = "httpx" }, - { name = "httpx-sse" }, - { name = "pydantic" }, - { name = "pydantic-core" }, - { name = "requests" }, - { name = "tokenizers" }, - { name = "types-requests" }, - { name = "typing-extensions" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/a1/33/69c7d1b25a20eafef4197a1444c7f87d5241e936194e54876ea8996157e6/cohere-5.15.0.tar.gz", hash = "sha256:e802d4718ddb0bb655654382ebbce002756a3800faac30296cde7f1bdc6ff2cc", size = 135021, upload-time = "2025-04-15T13:39:51.404Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c7/87/94694db7fe6df979fbc03286eaabdfa98f1c8fa532960e5afdf965e10960/cohere-5.15.0-py3-none-any.whl", hash = "sha256:22ff867c2a6f2fc2b585360c6072f584f11f275ef6d9242bac24e0fa2df1dfb5", size = 259522, upload-time = "2025-04-15T13:39:49.498Z" }, -] - [[package]] name = "colorama" version = "0.4.6" @@ -1211,13 +1176,19 @@ provides-extras = ["aisuite", "docling", "embeddings", "mem0", "openpyxl", "pand name = "crewai-tools" source = { editable = "packages/tools" } dependencies = [ + { name = "beautifulsoup4" }, + { name = "click" }, { name = "crewai-core" }, { name = "docker" }, { name = "lancedb" }, + { name = "portalocker" }, + { name = "pypdf" }, + { name = "python-docx" }, { name = "pytube" }, { name = "requests" }, { name = "stagehand" }, { name = "tiktoken" }, + { name = "youtube-transcript-api" }, ] [package.optional-dependencies] @@ -1249,9 +1220,6 @@ couchbase = [ databricks-sdk = [ { name = "databricks-sdk" }, ] -embedchain = [ - { name = "embedchain" }, -] exa-py = [ { name = "exa-py" }, ] @@ -1278,12 +1246,18 @@ mongodb = [ multion = [ { name = "multion" }, ] +mysql = [ + { name = "pymysql" }, +] oxylabs = [ { name = "oxylabs" }, ] patronus = [ { name = "patronus" }, ] +postgresql = [ + { name = "psycopg2-binary" }, +] qdrant-client = [ { name = "qdrant-client" }, ] @@ -1332,12 +1306,23 @@ xml = [ { name = "unstructured", extra = ["all-docs", "local-inference"] }, ] +[package.dev-dependencies] +dev = [ + { name = "mypy" }, + { name = "pytest" }, + { name = "pytest-asyncio" }, + { name = "pytest-recording" }, + { name = "ruff" }, +] + [package.metadata] requires-dist = [ + { name = "beautifulsoup4", specifier = ">=4.13.4" }, { name = "beautifulsoup4", marker = "extra == 'beautifulsoup4'", specifier = ">=4.12.3" }, { name = "beautifulsoup4", marker = "extra == 'bedrock'", specifier = ">=4.13.4" }, { name = "bedrock-agentcore", marker = "extra == 'bedrock'", specifier = ">=0.1.0" }, { name = "browserbase", marker = "extra == 'browserbase'", specifier = ">=1.0.5" }, + { name = "click", specifier = ">=8.1.8" }, { name = "composio-core", marker = "extra == 'composio-core'", specifier = ">=0.6.11.post1" }, { name = "contextual-client", marker = "extra == 'contextual'", specifier = ">=0.1.0" }, { name = "couchbase", marker = "extra == 'couchbase'", specifier = ">=4.3.5" }, @@ -1345,7 +1330,6 @@ requires-dist = [ { name = "cryptography", marker = "extra == 'snowflake'", specifier = ">=43.0.3" }, { name = "databricks-sdk", marker = "extra == 'databricks-sdk'", specifier = ">=0.46.0" }, { name = "docker", specifier = ">=7.1.0" }, - { name = "embedchain", marker = "extra == 'embedchain'", specifier = ">=0.1.114" }, { name = "exa-py", marker = "extra == 'exa-py'", specifier = ">=1.8.7" }, { name = "firecrawl-py", marker = "extra == 'firecrawl-py'", specifier = ">=1.8.0" }, { name = "gitpython", marker = "extra == 'github'", specifier = "==3.1.38" }, @@ -1362,8 +1346,13 @@ requires-dist = [ { name = "oxylabs", marker = "extra == 'oxylabs'", specifier = "==2.0.0" }, { name = "patronus", marker = "extra == 'patronus'", specifier = ">=0.0.16" }, { name = "playwright", marker = "extra == 'bedrock'", specifier = ">=1.52.0" }, + { name = "portalocker", specifier = "==2.7.0" }, + { name = "psycopg2-binary", marker = "extra == 'postgresql'", specifier = ">=2.9.10" }, { name = "pygithub", marker = "extra == 'github'", specifier = "==1.59.1" }, { name = "pymongo", marker = "extra == 'mongodb'", specifier = ">=4.13" }, + { name = "pymysql", marker = "extra == 'mysql'", specifier = ">=1.1.1" }, + { name = "pypdf", specifier = ">=5.9.0" }, + { name = "python-docx", specifier = ">=1.2.0" }, { name = "python-docx", marker = "extra == 'rag'", specifier = ">=1.1.0" }, { name = "pytube", specifier = ">=15.0.0" }, { name = "qdrant-client", marker = "extra == 'qdrant-client'", specifier = ">=1.12.1" }, @@ -1384,8 +1373,18 @@ requires-dist = [ { name = "tiktoken", specifier = ">=0.8.0" }, { name = "unstructured", extras = ["all-docs", "local-inference"], marker = "extra == 'xml'", specifier = ">=0.17.2" }, { name = "weaviate-client", marker = "extra == 'weaviate-client'", specifier = ">=4.10.2" }, + { name = "youtube-transcript-api", specifier = ">=1.2.2" }, +] +provides-extras = ["apify", "beautifulsoup4", "bedrock", "browserbase", "composio-core", "contextual", "couchbase", "databricks-sdk", "exa-py", "firecrawl-py", "github", "hyperbrowser", "linkup-sdk", "mcp", "mongodb", "multion", "mysql", "oxylabs", "patronus", "postgresql", "qdrant-client", "rag", "scrapegraph-py", "scrapfly-sdk", "selenium", "serpapi", "singlestore", "snowflake", "spider-client", "sqlalchemy", "stagehand", "tavily-python", "weaviate-client", "xml"] + +[package.metadata.requires-dev] +dev = [ + { name = "mypy", specifier = ">=1.18.1" }, + { name = "pytest", specifier = ">=8.0.0" }, + { name = "pytest-asyncio", specifier = ">=0.25.2" }, + { name = "pytest-recording", specifier = ">=0.13.3" }, + { name = "ruff", specifier = ">=0.13.0" }, ] -provides-extras = ["apify", "beautifulsoup4", "bedrock", "browserbase", "composio-core", "contextual", "couchbase", "databricks-sdk", "embedchain", "exa-py", "firecrawl-py", "github", "hyperbrowser", "linkup-sdk", "mcp", "mongodb", "multion", "oxylabs", "patronus", "qdrant-client", "rag", "scrapegraph-py", "scrapfly-sdk", "selenium", "serpapi", "singlestore", "snowflake", "spider-client", "sqlalchemy", "stagehand", "tavily-python", "weaviate-client", "xml"] [[package]] name = "cryptography" @@ -1478,6 +1477,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/4e/8c/f3147f5c4b73e7550fe5f9352eaa956ae838d5c51eb58e7a25b9f3e2643b/decorator-5.2.1-py3-none-any.whl", hash = "sha256:d316bb415a2d9e2d2b3abcc4084c6502fc09240e292cd76a76afc106a1c8e04a", size = 9190, upload-time = "2025-02-24T04:41:32.565Z" }, ] +[[package]] +name = "defusedxml" +version = "0.7.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/0f/d5/c66da9b79e5bdb124974bfe172b4daf3c984ebd9c2a06e2b8a4dc7331c72/defusedxml-0.7.1.tar.gz", hash = "sha256:1bb3032db185915b62d7c6209c5a8792be6a32ab2fedacc84e01b52c51aa3e69", size = 75520, upload-time = "2021-03-08T10:59:26.269Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/07/6c/aa3f2f849e01cb6a001cd8554a88d4c77c5c1a31c95bdf1cf9301e6d9ef4/defusedxml-0.7.1-py2.py3-none-any.whl", hash = "sha256:a352e7e428770286cc899e2542b6cdaedb2b4953ff269a210103ec58f6198a61", size = 25604, upload-time = "2021-03-08T10:59:24.45Z" }, +] + [[package]] name = "deprecated" version = "1.2.18" @@ -1735,35 +1743,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9c/13/563119fe0af82aca5a3b89399c435953072c39515c2e818eb82793955c3b/effdet-0.4.1-py3-none-any.whl", hash = "sha256:10889a226228d515c948e3fcf811e64c0d78d7aa94823a300045653b9c284cb7", size = 112513, upload-time = "2023-05-21T22:17:58.47Z" }, ] -[[package]] -name = "embedchain" -version = "0.1.128" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "alembic" }, - { name = "beautifulsoup4" }, - { name = "chromadb" }, - { name = "gptcache" }, - { name = "langchain" }, - { name = "langchain-cohere" }, - { name = "langchain-community" }, - { name = "langchain-openai" }, - { name = "langsmith" }, - { name = "mem0ai" }, - { name = "openai" }, - { name = "posthog" }, - { name = "pypdf" }, - { name = "pysbd" }, - { name = "python-dotenv" }, - { name = "rich" }, - { name = "schema" }, - { name = "sqlalchemy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/4f/18/63cdd649ff26dcc532f78ccfc565ee9b25fad2abcf82f04456f5ae7580d9/embedchain-0.1.128.tar.gz", hash = "sha256:641cef036d4c2f4b2b2d26019156647dcaa1b6d98f5d16e3798a12f46499bb8f", size = 118754, upload-time = "2025-03-25T07:49:02.112Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ce/e2/6806c26fa402e47f2924bac64d11a1af3c426b1d967588067f23325fd417/embedchain-0.1.128-py3-none-any.whl", hash = "sha256:380e848c053a335b06d535efcbfdc6b98a5d0b2a6a1f553aae94cb1c85676183", size = 211343, upload-time = "2025-03-25T07:49:00.349Z" }, -] - [[package]] name = "emoji" version = "2.14.1" @@ -1851,43 +1830,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/53/50/b1222562c6d270fea83e9c9075b8e8600b8479150a18e4516a6138b980d1/fastapi-0.115.14-py3-none-any.whl", hash = "sha256:6c0c8bf9420bd58f565e585036d971872472b4f7d3f6c73b698e10cffdefb3ca", size = 95514, upload-time = "2025-06-26T15:29:06.49Z" }, ] -[[package]] -name = "fastavro" -version = "1.11.1" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/48/8f/32664a3245247b13702d13d2657ea534daf64e58a3f72a3a2d10598d6916/fastavro-1.11.1.tar.gz", hash = "sha256:bf6acde5ee633a29fb8dfd6dfea13b164722bc3adc05a0e055df080549c1c2f8", size = 1016250, upload-time = "2025-05-18T04:54:31.413Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ae/be/53df3fec7fdabc1848896a76afb0f01ab96b58abb29611aa68a994290167/fastavro-1.11.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:603aa1c1d1be21fb4bcb63e1efb0711a9ddb337de81391c32dac95c6e0dacfcc", size = 944225, upload-time = "2025-05-18T04:54:34.586Z" }, - { url = "https://files.pythonhosted.org/packages/d0/cc/c7c76a082fbf5aaaf82ab7da7b9ede6fc99eb8f008c084c67d230b29c446/fastavro-1.11.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:45653b312d4ce297e2bd802ea3ffd17ecbe718e5e8b6e2ae04cd72cb50bb99d5", size = 3105189, upload-time = "2025-05-18T04:54:36.855Z" }, - { url = "https://files.pythonhosted.org/packages/48/ff/5f1f0b5e3835e788ba8121d6dd6426cd4c6e58ce1bff02cb7810278648b0/fastavro-1.11.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:998a53fc552e6bee9acda32af258f02557313c85fb5b48becba5b71ec82f421e", size = 3113124, upload-time = "2025-05-18T04:54:40.013Z" }, - { url = "https://files.pythonhosted.org/packages/e5/b8/1ac01433b55460dabeb6d3fbb05ba1c971d57137041e8f53b2e9f46cd033/fastavro-1.11.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:9f878c9ad819467120cb066f1c73496c42eb24ecdd7c992ec996f465ef4cedad", size = 3155196, upload-time = "2025-05-18T04:54:42.307Z" }, - { url = "https://files.pythonhosted.org/packages/5e/a8/66e599b946ead031a5caba12772e614a7802d95476e8732e2e9481369973/fastavro-1.11.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:da9e4c231ac4951092c2230ca423d8a3f2966718f072ac1e2c5d2d44c70b2a50", size = 3229028, upload-time = "2025-05-18T04:54:44.503Z" }, - { url = "https://files.pythonhosted.org/packages/0e/e7/17c35e2dfe8a9e4f3735eabdeec366b0edc4041bb1a84fcd528c8efd12af/fastavro-1.11.1-cp310-cp310-win_amd64.whl", hash = "sha256:7423bfad3199567eeee7ad6816402c7c0ee1658b959e8c10540cfbc60ce96c2a", size = 449177, upload-time = "2025-05-18T04:54:46.127Z" }, - { url = "https://files.pythonhosted.org/packages/8e/63/f33d6fd50d8711f305f07ad8c7b4a25f2092288f376f484c979dcf277b07/fastavro-1.11.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:3573340e4564e8962e22f814ac937ffe0d4be5eabbd2250f77738dc47e3c8fe9", size = 957526, upload-time = "2025-05-18T04:54:47.701Z" }, - { url = "https://files.pythonhosted.org/packages/f4/09/a57ad9d8cb9b8affb2e43c29d8fb8cbdc0f1156f8496067a0712c944bacc/fastavro-1.11.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:7291cf47735b8bd6ff5d9b33120e6e0974f52fd5dff90cd24151b22018e7fd29", size = 3322808, upload-time = "2025-05-18T04:54:50.419Z" }, - { url = "https://files.pythonhosted.org/packages/86/70/d6df59309d3754d6d4b0c7beca45b9b1a957d6725aed8da3aca247db3475/fastavro-1.11.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:bf3bb065d657d5bac8b2cb39945194aa086a9b3354f2da7f89c30e4dc20e08e2", size = 3330870, upload-time = "2025-05-18T04:54:52.406Z" }, - { url = "https://files.pythonhosted.org/packages/ad/ea/122315154d2a799a2787058435ef0d4d289c0e8e575245419436e9b702ca/fastavro-1.11.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:8758317c85296b848698132efb13bc44a4fbd6017431cc0f26eaeb0d6fa13d35", size = 3343369, upload-time = "2025-05-18T04:54:54.652Z" }, - { url = "https://files.pythonhosted.org/packages/62/12/7800de5fec36d55a818adf3db3b085b1a033c4edd60323cf6ca0754cf8cb/fastavro-1.11.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:ad99d57228f83bf3e2214d183fbf6e2fda97fd649b2bdaf8e9110c36cbb02624", size = 3430629, upload-time = "2025-05-18T04:54:56.513Z" }, - { url = "https://files.pythonhosted.org/packages/48/65/2b74ccfeba9dcc3f7dbe64907307386b4a0af3f71d2846f63254df0f1e1d/fastavro-1.11.1-cp311-cp311-win_amd64.whl", hash = "sha256:9134090178bdbf9eefd467717ced3dc151e27a7e7bfc728260ce512697efe5a4", size = 451621, upload-time = "2025-05-18T04:54:58.156Z" }, - { url = "https://files.pythonhosted.org/packages/99/58/8e789b0a2f532b22e2d090c20d27c88f26a5faadcba4c445c6958ae566cf/fastavro-1.11.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:e8bc238f2637cd5d15238adbe8fb8c58d2e6f1870e0fb28d89508584670bae4b", size = 939583, upload-time = "2025-05-18T04:54:59.853Z" }, - { url = "https://files.pythonhosted.org/packages/34/3f/02ed44742b1224fe23c9fc9b9b037fc61769df716c083cf80b59a02b9785/fastavro-1.11.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b403933081c83fc4d8a012ee64b86e560a024b1280e3711ee74f2abc904886e8", size = 3257734, upload-time = "2025-05-18T04:55:02.366Z" }, - { url = "https://files.pythonhosted.org/packages/cc/bc/9cc8b19eeee9039dd49719f8b4020771e805def262435f823fa8f27ddeea/fastavro-1.11.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3f6ecb4b5f77aa756d973b7dd1c2fb4e4c95b4832a3c98b059aa96c61870c709", size = 3318218, upload-time = "2025-05-18T04:55:04.352Z" }, - { url = "https://files.pythonhosted.org/packages/39/77/3b73a986606494596b6d3032eadf813a05b59d1623f54384a23de4217d5f/fastavro-1.11.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:059893df63ef823b0231b485c9d43016c7e32850cae7bf69f4e9d46dd41c28f2", size = 3297296, upload-time = "2025-05-18T04:55:06.175Z" }, - { url = "https://files.pythonhosted.org/packages/8e/1c/b69ceef6494bd0df14752b5d8648b159ad52566127bfd575e9f5ecc0c092/fastavro-1.11.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:5120ffc9a200699218e01777e695a2f08afb3547ba818184198c757dc39417bd", size = 3438056, upload-time = "2025-05-18T04:55:08.276Z" }, - { url = "https://files.pythonhosted.org/packages/ef/11/5c2d0db3bd0e6407546fabae9e267bb0824eacfeba79e7dd81ad88afa27d/fastavro-1.11.1-cp312-cp312-win_amd64.whl", hash = "sha256:7bb9d0d2233f33a52908b6ea9b376fe0baf1144bdfdfb3c6ad326e200a8b56b0", size = 442824, upload-time = "2025-05-18T04:55:10.385Z" }, - { url = "https://files.pythonhosted.org/packages/ec/08/8e25b9e87a98f8c96b25e64565fa1a1208c0095bb6a84a5c8a4b925688a5/fastavro-1.11.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:f963b8ddaf179660e814ab420850c1b4ea33e2ad2de8011549d958b21f77f20a", size = 931520, upload-time = "2025-05-18T04:55:11.614Z" }, - { url = "https://files.pythonhosted.org/packages/02/ee/7cf5561ef94781ed6942cee6b394a5e698080f4247f00f158ee396ec244d/fastavro-1.11.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0253e5b6a3c9b62fae9fc3abd8184c5b64a833322b6af7d666d3db266ad879b5", size = 3195989, upload-time = "2025-05-18T04:55:13.732Z" }, - { url = "https://files.pythonhosted.org/packages/b3/31/f02f097d79f090e5c5aca8a743010c4e833a257c0efdeb289c68294f7928/fastavro-1.11.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca637b150e1f4c0e8e564fad40a16bd922bcb7ffd1a6e4836e6084f2c4f4e8db", size = 3239755, upload-time = "2025-05-18T04:55:16.463Z" }, - { url = "https://files.pythonhosted.org/packages/09/4c/46626b4ee4eb8eb5aa7835973c6ba8890cf082ef2daface6071e788d2992/fastavro-1.11.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:76af1709031621828ca6ce7f027f7711fa33ac23e8269e7a5733996ff8d318da", size = 3243788, upload-time = "2025-05-18T04:55:18.544Z" }, - { url = "https://files.pythonhosted.org/packages/a7/6f/8ed42524e9e8dc0554f0f211dd1c6c7a9dde83b95388ddcf7c137e70796f/fastavro-1.11.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:8224e6d8d9864d4e55dafbe88920d6a1b8c19cc3006acfac6aa4f494a6af3450", size = 3378330, upload-time = "2025-05-18T04:55:20.887Z" }, - { url = "https://files.pythonhosted.org/packages/b8/51/38cbe243d5facccab40fc43a4c17db264c261be955ce003803d25f0da2c3/fastavro-1.11.1-cp313-cp313-win_amd64.whl", hash = "sha256:cde7ed91b52ff21f0f9f157329760ba7251508ca3e9618af3ffdac986d9faaa2", size = 443115, upload-time = "2025-05-18T04:55:22.107Z" }, - { url = "https://files.pythonhosted.org/packages/d0/57/0d31ed1a49c65ad9f0f0128d9a928972878017781f9d4336f5f60982334c/fastavro-1.11.1-cp313-cp313t-macosx_10_13_universal2.whl", hash = "sha256:e5ed1325c1c414dd954e7a2c5074daefe1eceb672b8c727aa030ba327aa00693", size = 1021401, upload-time = "2025-05-18T04:55:23.431Z" }, - { url = "https://files.pythonhosted.org/packages/56/7a/a3f1a75fbfc16b3eff65dc0efcdb92364967923194312b3f8c8fc2cb95be/fastavro-1.11.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cd3c95baeec37188899824faf44a5ee94dfc4d8667b05b2f867070c7eb174c4", size = 3384349, upload-time = "2025-05-18T04:55:25.575Z" }, - { url = "https://files.pythonhosted.org/packages/be/84/02bceb7518867df84027232a75225db758b9b45f12017c9743f45b73101e/fastavro-1.11.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2e0babcd81acceb4c60110af9efa25d890dbb68f7de880f806dadeb1e70fe413", size = 3240658, upload-time = "2025-05-18T04:55:27.633Z" }, - { url = "https://files.pythonhosted.org/packages/f2/17/508c846c644d39bc432b027112068b8e96e7560468304d4c0757539dd73a/fastavro-1.11.1-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:b2c0cb8063c7208b53b6867983dc6ae7cc80b91116b51d435d2610a5db2fc52f", size = 3372809, upload-time = "2025-05-18T04:55:30.063Z" }, - { url = "https://files.pythonhosted.org/packages/fe/84/9c2917a70ed570ddbfd1d32ac23200c1d011e36c332e59950d2f6d204941/fastavro-1.11.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:1bc2824e9969c04ab6263d269a1e0e5d40b9bd16ade6b70c29d6ffbc4f3cc102", size = 3387171, upload-time = "2025-05-18T04:55:32.531Z" }, -] - [[package]] name = "fastembed" version = "0.7.1" @@ -2198,21 +2140,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/86/f1/62a193f0227cf15a920390abe675f386dec35f7ae3ffe6da582d3ade42c7/googleapis_common_protos-1.70.0-py3-none-any.whl", hash = "sha256:b8bfcca8c25a2bb253e0e0b0adaf8c00773e5e6af6fd92397576680b807e0fd8", size = 294530, upload-time = "2025-04-14T10:17:01.271Z" }, ] -[[package]] -name = "gptcache" -version = "0.1.44" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cachetools" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "requests" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/27/73/5cc20749e06017044106837550384f5d8ed00b8e9570689f17e7292e2d23/gptcache-0.1.44.tar.gz", hash = "sha256:d3d5e6a75c57594dc58212c2d6c53a7999c23ede30e0be66d213d885c0ad0be9", size = 95969, upload-time = "2024-08-01T11:26:33.601Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/49/87/8dde0a3757bc207805f751b47878888b09db4a464ae48a55f386f091b488/gptcache-0.1.44-py3-none-any.whl", hash = "sha256:11ddd63b173dc3822b8c2eb7588ea947c825845ed0737b043038a238286bfec4", size = 131634, upload-time = "2024-08-01T11:26:27.449Z" }, -] - [[package]] name = "greenlet" version = "3.2.3" @@ -3106,25 +3033,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/16/db/9a31008d1c19f628efea0bb32a5420f915dda732df59987a94f523d1fd6f/lancedb-0.24.0-cp39-abi3-win_amd64.whl", hash = "sha256:5f1c22d86f0bd1e5dad3744ac28b02eafec89c83a0627903f840a1a7c77d785f", size = 35075123, upload-time = "2025-06-20T06:26:18.24Z" }, ] -[[package]] -name = "langchain" -version = "0.3.26" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "async-timeout", marker = "python_full_version < '3.11'" }, - { name = "langchain-core" }, - { name = "langchain-text-splitters" }, - { name = "langsmith" }, - { name = "pydantic" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sqlalchemy" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/7f/13/a9931800ee42bbe0f8850dd540de14e80dda4945e7ee36e20b5d5964286e/langchain-0.3.26.tar.gz", hash = "sha256:8ff034ee0556d3e45eff1f1e96d0d745ced57858414dba7171c8ebdbeb5580c9", size = 10226808, upload-time = "2025-06-20T22:23:01.174Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/f1/f2/c09a2e383283e3af1db669ab037ac05a45814f4b9c472c48dc24c0cef039/langchain-0.3.26-py3-none-any.whl", hash = "sha256:361bb2e61371024a8c473da9f9c55f4ee50f269c5ab43afdb2b1309cb7ac36cf", size = 1012336, upload-time = "2025-06-20T22:22:58.874Z" }, -] - [[package]] name = "langchain-apify" version = "0.1.4" @@ -3139,47 +3047,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c5/dc/cc67014b6c5e74486c4bca18a78d395b9f308074ff9b6745a0bbf7a64d27/langchain_apify-0.1.4-py3-none-any.whl", hash = "sha256:06a36685d14eabefce2d7cc6bfdd0b76dd537b42b587c1a9fd6b79044a6bd6e1", size = 16477, upload-time = "2025-08-19T18:43:39.537Z" }, ] -[[package]] -name = "langchain-cohere" -version = "0.3.5" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "cohere" }, - { name = "langchain-core" }, - { name = "langchain-experimental" }, - { name = "pandas" }, - { name = "pydantic" }, - { name = "tabulate" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/8a/24/d0ab0875b32a540898b74005860adea5c68779e445ab438b1f3222af15e3/langchain_cohere-0.3.5.tar.gz", hash = "sha256:1b397921c23696b2a11121dfbc4298bbf9a27690052cf72b2675c91594747534", size = 37570, upload-time = "2025-01-21T17:38:33.554Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/9e/64/85c21febec4ee102a9f561a1b0d6298a9d64b1a032f352510a02af0ec7c4/langchain_cohere-0.3.5-py3-none-any.whl", hash = "sha256:ff71e6a19b99f8c08b185e16408259dda55c078258cbe99acc222085ce0223bc", size = 45091, upload-time = "2025-01-21T17:38:31.738Z" }, -] - -[[package]] -name = "langchain-community" -version = "0.3.27" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "aiohttp" }, - { name = "dataclasses-json" }, - { name = "httpx-sse" }, - { name = "langchain" }, - { name = "langchain-core" }, - { name = "langsmith" }, - { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" }, - { name = "numpy", version = "2.3.1", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" }, - { name = "pydantic-settings" }, - { name = "pyyaml" }, - { name = "requests" }, - { name = "sqlalchemy" }, - { name = "tenacity" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/5c/76/200494f6de488217a196c4369e665d26b94c8c3642d46e2fd62f9daf0a3a/langchain_community-0.3.27.tar.gz", hash = "sha256:e1037c3b9da0c6d10bf06e838b034eb741e016515c79ef8f3f16e53ead33d882", size = 33237737, upload-time = "2025-07-02T18:47:02.329Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/c8/bc/f8c7dae8321d37ed39ac9d7896617c4203248240a4835b136e3724b3bb62/langchain_community-0.3.27-py3-none-any.whl", hash = "sha256:581f97b795f9633da738ea95da9cb78f8879b538090c9b7a68c0aed49c828f0d", size = 2530442, upload-time = "2025-07-02T18:47:00.246Z" }, -] - [[package]] name = "langchain-core" version = "0.3.67" @@ -3198,45 +3065,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9f/2b/a0d283089c6d08c12d47dca39a55029ff714e939ec04f4560420426ab613/langchain_core-0.3.67-py3-none-any.whl", hash = "sha256:b699f1f24b24fa2747c05e2daa280aa64478a51e01a4e82c7f8e20b6167dfa99", size = 440237, upload-time = "2025-06-30T17:09:33.323Z" }, ] -[[package]] -name = "langchain-experimental" -version = "0.3.4" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "langchain-community" }, - { name = "langchain-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/27/56/a8acbb08a03383c28875b3b151e4cefea5612266917fbd6fc3c14c21e172/langchain_experimental-0.3.4.tar.gz", hash = "sha256:937c4259ee4a639c618d19acf0e2c5c2898ef127050346edc5655259aa281a21", size = 140532, upload-time = "2024-12-20T15:16:09.42Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b2/27/fe8caa4884611286b1f7d6c5cfd76e1fef188faaa946db4fde6daa1cd2cd/langchain_experimental-0.3.4-py3-none-any.whl", hash = "sha256:2e587306aea36b60fa5e5fc05dc7281bee9f60a806f0bf9d30916e0ee096af80", size = 209154, upload-time = "2024-12-20T15:16:07.006Z" }, -] - -[[package]] -name = "langchain-openai" -version = "0.2.14" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "langchain-core" }, - { name = "openai" }, - { name = "tiktoken" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e5/fd/8256eba9a159f95a13c5bf7f1f49683de93b3876585b768e6be5dc3a5765/langchain_openai-0.2.14.tar.gz", hash = "sha256:7a514f309e356b182a337c0ed36ab3fbe34d9834a235a3b85cb7f91ae775d978", size = 43647, upload-time = "2024-12-19T21:51:40.629Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ed/54/63c8264d7dbc3bf31ba61bf97740fdd76386b2d4f9a58f58afd3961ce7d7/langchain_openai-0.2.14-py3-none-any.whl", hash = "sha256:d232496662f79ece9a11caf7d798ba863e559c771bc366814f7688e0fe664fe8", size = 50876, upload-time = "2024-12-19T21:51:39.3Z" }, -] - -[[package]] -name = "langchain-text-splitters" -version = "0.3.8" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "langchain-core" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/e7/ac/b4a25c5716bb0103b1515f1f52cc69ffb1035a5a225ee5afe3aed28bf57b/langchain_text_splitters-0.3.8.tar.gz", hash = "sha256:116d4b9f2a22dda357d0b79e30acf005c5518177971c66a9f1ab0edfdb0f912e", size = 42128, upload-time = "2025-04-04T14:03:51.521Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/8b/a3/3696ff2444658053c01b6b7443e761f28bb71217d82bb89137a978c5f66f/langchain_text_splitters-0.3.8-py3-none-any.whl", hash = "sha256:e75cc0f4ae58dcf07d9f18776400cf8ade27fadd4ff6d264df6278bb302f6f02", size = 32440, upload-time = "2025-04-04T14:03:50.6Z" }, -] - [[package]] name = "langdetect" version = "1.0.9" @@ -3415,18 +3243,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/6b/40/7d49ff503cc90b03253eba0768feec909b47ce92a90591b025c774a29a95/lxml-5.3.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0a006390834603e5952a2ff74b9a31a6007c7cc74282a087aa6467afb4eea987", size = 3487898, upload-time = "2025-04-05T18:30:55.122Z" }, ] -[[package]] -name = "mako" -version = "1.3.10" -source = { registry = "https://pypi.org/simple" } -dependencies = [ - { name = "markupsafe" }, -] -sdist = { url = "https://files.pythonhosted.org/packages/9e/38/bd5b78a920a64d708fe6bc8e0a2c075e1389d53bef8413725c63ba041535/mako-1.3.10.tar.gz", hash = "sha256:99579a6f39583fa7e5630a28c3c1f440e4e97a414b80372649c0ce338da2ea28", size = 392474, upload-time = "2025-04-10T12:44:31.16Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/87/fb/99f81ac72ae23375f22b7afdb7642aba97c00a713c217124420147681a2f/mako-1.3.10-py3-none-any.whl", hash = "sha256:baef24a52fc4fc514a0887ac600f9f1cff3d82c61d4d700a1fa84d597b88db59", size = 78509, upload-time = "2025-04-10T12:50:53.297Z" }, -] - [[package]] name = "markdown" version = "3.8.2" @@ -4048,7 +3864,7 @@ wheels = [ [[package]] name = "mypy" -version = "1.17.1" +version = "1.18.2" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "mypy-extensions" }, @@ -4056,33 +3872,33 @@ dependencies = [ { name = "tomli", marker = "python_full_version < '3.11'" }, { name = "typing-extensions" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/8e/22/ea637422dedf0bf36f3ef238eab4e455e2a0dcc3082b5cc067615347ab8e/mypy-1.17.1.tar.gz", hash = "sha256:25e01ec741ab5bb3eec8ba9cdb0f769230368a22c959c4937360efb89b7e9f01", size = 3352570, upload-time = "2025-07-31T07:54:19.204Z" } +sdist = { url = "https://files.pythonhosted.org/packages/c0/77/8f0d0001ffad290cef2f7f216f96c814866248a0b92a722365ed54648e7e/mypy-1.18.2.tar.gz", hash = "sha256:06a398102a5f203d7477b2923dda3634c36727fa5c237d8f859ef90c42a9924b", size = 3448846, upload-time = "2025-09-19T00:11:10.519Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/a9/3d7aa83955617cdf02f94e50aab5c830d205cfa4320cf124ff64acce3a8e/mypy-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:3fbe6d5555bf608c47203baa3e72dbc6ec9965b3d7c318aa9a4ca76f465bd972", size = 11003299, upload-time = "2025-07-31T07:54:06.425Z" }, - { url = "https://files.pythonhosted.org/packages/83/e8/72e62ff837dd5caaac2b4a5c07ce769c8e808a00a65e5d8f94ea9c6f20ab/mypy-1.17.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:80ef5c058b7bce08c83cac668158cb7edea692e458d21098c7d3bce35a5d43e7", size = 10125451, upload-time = "2025-07-31T07:53:52.974Z" }, - { url = "https://files.pythonhosted.org/packages/7d/10/f3f3543f6448db11881776f26a0ed079865926b0c841818ee22de2c6bbab/mypy-1.17.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c4a580f8a70c69e4a75587bd925d298434057fe2a428faaf927ffe6e4b9a98df", size = 11916211, upload-time = "2025-07-31T07:53:18.879Z" }, - { url = "https://files.pythonhosted.org/packages/06/bf/63e83ed551282d67bb3f7fea2cd5561b08d2bb6eb287c096539feb5ddbc5/mypy-1.17.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dd86bb649299f09d987a2eebb4d52d10603224500792e1bee18303bbcc1ce390", size = 12652687, upload-time = "2025-07-31T07:53:30.544Z" }, - { url = "https://files.pythonhosted.org/packages/69/66/68f2eeef11facf597143e85b694a161868b3b006a5fbad50e09ea117ef24/mypy-1.17.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:a76906f26bd8d51ea9504966a9c25419f2e668f012e0bdf3da4ea1526c534d94", size = 12896322, upload-time = "2025-07-31T07:53:50.74Z" }, - { url = "https://files.pythonhosted.org/packages/a3/87/8e3e9c2c8bd0d7e071a89c71be28ad088aaecbadf0454f46a540bda7bca6/mypy-1.17.1-cp310-cp310-win_amd64.whl", hash = "sha256:e79311f2d904ccb59787477b7bd5d26f3347789c06fcd7656fa500875290264b", size = 9507962, upload-time = "2025-07-31T07:53:08.431Z" }, - { url = "https://files.pythonhosted.org/packages/46/cf/eadc80c4e0a70db1c08921dcc220357ba8ab2faecb4392e3cebeb10edbfa/mypy-1.17.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ad37544be07c5d7fba814eb370e006df58fed8ad1ef33ed1649cb1889ba6ff58", size = 10921009, upload-time = "2025-07-31T07:53:23.037Z" }, - { url = "https://files.pythonhosted.org/packages/5d/c1/c869d8c067829ad30d9bdae051046561552516cfb3a14f7f0347b7d973ee/mypy-1.17.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:064e2ff508e5464b4bd807a7c1625bc5047c5022b85c70f030680e18f37273a5", size = 10047482, upload-time = "2025-07-31T07:53:26.151Z" }, - { url = "https://files.pythonhosted.org/packages/98/b9/803672bab3fe03cee2e14786ca056efda4bb511ea02dadcedde6176d06d0/mypy-1.17.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:70401bbabd2fa1aa7c43bb358f54037baf0586f41e83b0ae67dd0534fc64edfd", size = 11832883, upload-time = "2025-07-31T07:53:47.948Z" }, - { url = "https://files.pythonhosted.org/packages/88/fb/fcdac695beca66800918c18697b48833a9a6701de288452b6715a98cfee1/mypy-1.17.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:e92bdc656b7757c438660f775f872a669b8ff374edc4d18277d86b63edba6b8b", size = 12566215, upload-time = "2025-07-31T07:54:04.031Z" }, - { url = "https://files.pythonhosted.org/packages/7f/37/a932da3d3dace99ee8eb2043b6ab03b6768c36eb29a02f98f46c18c0da0e/mypy-1.17.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:c1fdf4abb29ed1cb091cf432979e162c208a5ac676ce35010373ff29247bcad5", size = 12751956, upload-time = "2025-07-31T07:53:36.263Z" }, - { url = "https://files.pythonhosted.org/packages/8c/cf/6438a429e0f2f5cab8bc83e53dbebfa666476f40ee322e13cac5e64b79e7/mypy-1.17.1-cp311-cp311-win_amd64.whl", hash = "sha256:ff2933428516ab63f961644bc49bc4cbe42bbffb2cd3b71cc7277c07d16b1a8b", size = 9507307, upload-time = "2025-07-31T07:53:59.734Z" }, - { url = "https://files.pythonhosted.org/packages/17/a2/7034d0d61af8098ec47902108553122baa0f438df8a713be860f7407c9e6/mypy-1.17.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:69e83ea6553a3ba79c08c6e15dbd9bfa912ec1e493bf75489ef93beb65209aeb", size = 11086295, upload-time = "2025-07-31T07:53:28.124Z" }, - { url = "https://files.pythonhosted.org/packages/14/1f/19e7e44b594d4b12f6ba8064dbe136505cec813549ca3e5191e40b1d3cc2/mypy-1.17.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:1b16708a66d38abb1e6b5702f5c2c87e133289da36f6a1d15f6a5221085c6403", size = 10112355, upload-time = "2025-07-31T07:53:21.121Z" }, - { url = "https://files.pythonhosted.org/packages/5b/69/baa33927e29e6b4c55d798a9d44db5d394072eef2bdc18c3e2048c9ed1e9/mypy-1.17.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:89e972c0035e9e05823907ad5398c5a73b9f47a002b22359b177d40bdaee7056", size = 11875285, upload-time = "2025-07-31T07:53:55.293Z" }, - { url = "https://files.pythonhosted.org/packages/90/13/f3a89c76b0a41e19490b01e7069713a30949d9a6c147289ee1521bcea245/mypy-1.17.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:03b6d0ed2b188e35ee6d5c36b5580cffd6da23319991c49ab5556c023ccf1341", size = 12737895, upload-time = "2025-07-31T07:53:43.623Z" }, - { url = "https://files.pythonhosted.org/packages/23/a1/c4ee79ac484241301564072e6476c5a5be2590bc2e7bfd28220033d2ef8f/mypy-1.17.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c837b896b37cd103570d776bda106eabb8737aa6dd4f248451aecf53030cdbeb", size = 12931025, upload-time = "2025-07-31T07:54:17.125Z" }, - { url = "https://files.pythonhosted.org/packages/89/b8/7409477be7919a0608900e6320b155c72caab4fef46427c5cc75f85edadd/mypy-1.17.1-cp312-cp312-win_amd64.whl", hash = "sha256:665afab0963a4b39dff7c1fa563cc8b11ecff7910206db4b2e64dd1ba25aed19", size = 9584664, upload-time = "2025-07-31T07:54:12.842Z" }, - { url = "https://files.pythonhosted.org/packages/5b/82/aec2fc9b9b149f372850291827537a508d6c4d3664b1750a324b91f71355/mypy-1.17.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:93378d3203a5c0800c6b6d850ad2f19f7a3cdf1a3701d3416dbf128805c6a6a7", size = 11075338, upload-time = "2025-07-31T07:53:38.873Z" }, - { url = "https://files.pythonhosted.org/packages/07/ac/ee93fbde9d2242657128af8c86f5d917cd2887584cf948a8e3663d0cd737/mypy-1.17.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:15d54056f7fe7a826d897789f53dd6377ec2ea8ba6f776dc83c2902b899fee81", size = 10113066, upload-time = "2025-07-31T07:54:14.707Z" }, - { url = "https://files.pythonhosted.org/packages/5a/68/946a1e0be93f17f7caa56c45844ec691ca153ee8b62f21eddda336a2d203/mypy-1.17.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:209a58fed9987eccc20f2ca94afe7257a8f46eb5df1fb69958650973230f91e6", size = 11875473, upload-time = "2025-07-31T07:53:14.504Z" }, - { url = "https://files.pythonhosted.org/packages/9f/0f/478b4dce1cb4f43cf0f0d00fba3030b21ca04a01b74d1cd272a528cf446f/mypy-1.17.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:099b9a5da47de9e2cb5165e581f158e854d9e19d2e96b6698c0d64de911dd849", size = 12744296, upload-time = "2025-07-31T07:53:03.896Z" }, - { url = "https://files.pythonhosted.org/packages/ca/70/afa5850176379d1b303f992a828de95fc14487429a7139a4e0bdd17a8279/mypy-1.17.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:fa6ffadfbe6994d724c5a1bb6123a7d27dd68fc9c059561cd33b664a79578e14", size = 12914657, upload-time = "2025-07-31T07:54:08.576Z" }, - { url = "https://files.pythonhosted.org/packages/53/f9/4a83e1c856a3d9c8f6edaa4749a4864ee98486e9b9dbfbc93842891029c2/mypy-1.17.1-cp313-cp313-win_amd64.whl", hash = "sha256:9a2b7d9180aed171f033c9f2fc6c204c1245cf60b0cb61cf2e7acc24eea78e0a", size = 9593320, upload-time = "2025-07-31T07:53:01.341Z" }, - { url = "https://files.pythonhosted.org/packages/1d/f3/8fcd2af0f5b806f6cf463efaffd3c9548a28f84220493ecd38d127b6b66d/mypy-1.17.1-py3-none-any.whl", hash = "sha256:a9f52c0351c21fe24c21d8c0eb1f62967b262d6729393397b6f443c3b773c3b9", size = 2283411, upload-time = "2025-07-31T07:53:24.664Z" }, + { url = "https://files.pythonhosted.org/packages/03/6f/657961a0743cff32e6c0611b63ff1c1970a0b482ace35b069203bf705187/mypy-1.18.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c1eab0cf6294dafe397c261a75f96dc2c31bffe3b944faa24db5def4e2b0f77c", size = 12807973, upload-time = "2025-09-19T00:10:35.282Z" }, + { url = "https://files.pythonhosted.org/packages/10/e9/420822d4f661f13ca8900f5fa239b40ee3be8b62b32f3357df9a3045a08b/mypy-1.18.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:7a780ca61fc239e4865968ebc5240bb3bf610ef59ac398de9a7421b54e4a207e", size = 11896527, upload-time = "2025-09-19T00:10:55.791Z" }, + { url = "https://files.pythonhosted.org/packages/aa/73/a05b2bbaa7005f4642fcfe40fb73f2b4fb6bb44229bd585b5878e9a87ef8/mypy-1.18.2-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:448acd386266989ef11662ce3c8011fd2a7b632e0ec7d61a98edd8e27472225b", size = 12507004, upload-time = "2025-09-19T00:11:05.411Z" }, + { url = "https://files.pythonhosted.org/packages/4f/01/f6e4b9f0d031c11ccbd6f17da26564f3a0f3c4155af344006434b0a05a9d/mypy-1.18.2-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:f9e171c465ad3901dc652643ee4bffa8e9fef4d7d0eece23b428908c77a76a66", size = 13245947, upload-time = "2025-09-19T00:10:46.923Z" }, + { url = "https://files.pythonhosted.org/packages/d7/97/19727e7499bfa1ae0773d06afd30ac66a58ed7437d940c70548634b24185/mypy-1.18.2-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:592ec214750bc00741af1f80cbf96b5013d81486b7bb24cb052382c19e40b428", size = 13499217, upload-time = "2025-09-19T00:09:39.472Z" }, + { url = "https://files.pythonhosted.org/packages/9f/4f/90dc8c15c1441bf31cf0f9918bb077e452618708199e530f4cbd5cede6ff/mypy-1.18.2-cp310-cp310-win_amd64.whl", hash = "sha256:7fb95f97199ea11769ebe3638c29b550b5221e997c63b14ef93d2e971606ebed", size = 9766753, upload-time = "2025-09-19T00:10:49.161Z" }, + { url = "https://files.pythonhosted.org/packages/88/87/cafd3ae563f88f94eec33f35ff722d043e09832ea8530ef149ec1efbaf08/mypy-1.18.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:807d9315ab9d464125aa9fcf6d84fde6e1dc67da0b6f80e7405506b8ac72bc7f", size = 12731198, upload-time = "2025-09-19T00:09:44.857Z" }, + { url = "https://files.pythonhosted.org/packages/0f/e0/1e96c3d4266a06d4b0197ace5356d67d937d8358e2ee3ffac71faa843724/mypy-1.18.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:776bb00de1778caf4db739c6e83919c1d85a448f71979b6a0edd774ea8399341", size = 11817879, upload-time = "2025-09-19T00:09:47.131Z" }, + { url = "https://files.pythonhosted.org/packages/72/ef/0c9ba89eb03453e76bdac5a78b08260a848c7bfc5d6603634774d9cd9525/mypy-1.18.2-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:1379451880512ffce14505493bd9fe469e0697543717298242574882cf8cdb8d", size = 12427292, upload-time = "2025-09-19T00:10:22.472Z" }, + { url = "https://files.pythonhosted.org/packages/1a/52/ec4a061dd599eb8179d5411d99775bec2a20542505988f40fc2fee781068/mypy-1.18.2-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1331eb7fd110d60c24999893320967594ff84c38ac6d19e0a76c5fd809a84c86", size = 13163750, upload-time = "2025-09-19T00:09:51.472Z" }, + { url = "https://files.pythonhosted.org/packages/c4/5f/2cf2ceb3b36372d51568f2208c021870fe7834cf3186b653ac6446511839/mypy-1.18.2-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:3ca30b50a51e7ba93b00422e486cbb124f1c56a535e20eff7b2d6ab72b3b2e37", size = 13351827, upload-time = "2025-09-19T00:09:58.311Z" }, + { url = "https://files.pythonhosted.org/packages/c8/7d/2697b930179e7277529eaaec1513f8de622818696857f689e4a5432e5e27/mypy-1.18.2-cp311-cp311-win_amd64.whl", hash = "sha256:664dc726e67fa54e14536f6e1224bcfce1d9e5ac02426d2326e2bb4e081d1ce8", size = 9757983, upload-time = "2025-09-19T00:10:09.071Z" }, + { url = "https://files.pythonhosted.org/packages/07/06/dfdd2bc60c66611dd8335f463818514733bc763e4760dee289dcc33df709/mypy-1.18.2-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:33eca32dd124b29400c31d7cf784e795b050ace0e1f91b8dc035672725617e34", size = 12908273, upload-time = "2025-09-19T00:10:58.321Z" }, + { url = "https://files.pythonhosted.org/packages/81/14/6a9de6d13a122d5608e1a04130724caf9170333ac5a924e10f670687d3eb/mypy-1.18.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a3c47adf30d65e89b2dcd2fa32f3aeb5e94ca970d2c15fcb25e297871c8e4764", size = 11920910, upload-time = "2025-09-19T00:10:20.043Z" }, + { url = "https://files.pythonhosted.org/packages/5f/a9/b29de53e42f18e8cc547e38daa9dfa132ffdc64f7250e353f5c8cdd44bee/mypy-1.18.2-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:5d6c838e831a062f5f29d11c9057c6009f60cb294fea33a98422688181fe2893", size = 12465585, upload-time = "2025-09-19T00:10:33.005Z" }, + { url = "https://files.pythonhosted.org/packages/77/ae/6c3d2c7c61ff21f2bee938c917616c92ebf852f015fb55917fd6e2811db2/mypy-1.18.2-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:01199871b6110a2ce984bde85acd481232d17413868c9807e95c1b0739a58914", size = 13348562, upload-time = "2025-09-19T00:10:11.51Z" }, + { url = "https://files.pythonhosted.org/packages/4d/31/aec68ab3b4aebdf8f36d191b0685d99faa899ab990753ca0fee60fb99511/mypy-1.18.2-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:a2afc0fa0b0e91b4599ddfe0f91e2c26c2b5a5ab263737e998d6817874c5f7c8", size = 13533296, upload-time = "2025-09-19T00:10:06.568Z" }, + { url = "https://files.pythonhosted.org/packages/9f/83/abcb3ad9478fca3ebeb6a5358bb0b22c95ea42b43b7789c7fb1297ca44f4/mypy-1.18.2-cp312-cp312-win_amd64.whl", hash = "sha256:d8068d0afe682c7c4897c0f7ce84ea77f6de953262b12d07038f4d296d547074", size = 9828828, upload-time = "2025-09-19T00:10:28.203Z" }, + { url = "https://files.pythonhosted.org/packages/5f/04/7f462e6fbba87a72bc8097b93f6842499c428a6ff0c81dd46948d175afe8/mypy-1.18.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:07b8b0f580ca6d289e69209ec9d3911b4a26e5abfde32228a288eb79df129fcc", size = 12898728, upload-time = "2025-09-19T00:10:01.33Z" }, + { url = "https://files.pythonhosted.org/packages/99/5b/61ed4efb64f1871b41fd0b82d29a64640f3516078f6c7905b68ab1ad8b13/mypy-1.18.2-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:ed4482847168439651d3feee5833ccedbf6657e964572706a2adb1f7fa4dfe2e", size = 11910758, upload-time = "2025-09-19T00:10:42.607Z" }, + { url = "https://files.pythonhosted.org/packages/3c/46/d297d4b683cc89a6e4108c4250a6a6b717f5fa96e1a30a7944a6da44da35/mypy-1.18.2-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:c3ad2afadd1e9fea5cf99a45a822346971ede8685cc581ed9cd4d42eaf940986", size = 12475342, upload-time = "2025-09-19T00:11:00.371Z" }, + { url = "https://files.pythonhosted.org/packages/83/45/4798f4d00df13eae3bfdf726c9244bcb495ab5bd588c0eed93a2f2dd67f3/mypy-1.18.2-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:a431a6f1ef14cf8c144c6b14793a23ec4eae3db28277c358136e79d7d062f62d", size = 13338709, upload-time = "2025-09-19T00:11:03.358Z" }, + { url = "https://files.pythonhosted.org/packages/d7/09/479f7358d9625172521a87a9271ddd2441e1dab16a09708f056e97007207/mypy-1.18.2-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:7ab28cc197f1dd77a67e1c6f35cd1f8e8b73ed2217e4fc005f9e6a504e46e7ba", size = 13529806, upload-time = "2025-09-19T00:10:26.073Z" }, + { url = "https://files.pythonhosted.org/packages/71/cf/ac0f2c7e9d0ea3c75cd99dff7aec1c9df4a1376537cb90e4c882267ee7e9/mypy-1.18.2-cp313-cp313-win_amd64.whl", hash = "sha256:0e2785a84b34a72ba55fb5daf079a1003a34c05b22238da94fcae2bbe46f3544", size = 9833262, upload-time = "2025-09-19T00:10:40.035Z" }, + { url = "https://files.pythonhosted.org/packages/87/e3/be76d87158ebafa0309946c4a73831974d4d6ab4f4ef40c3b53a385a66fd/mypy-1.18.2-py3-none-any.whl", hash = "sha256:22a1748707dd62b58d2ae53562ffc4d7f8bcc727e8ac7cbc69c053ddc874d47e", size = 2352367, upload-time = "2025-09-19T00:10:15.489Z" }, ] [[package]] @@ -5503,6 +5319,61 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885, upload-time = "2025-02-13T21:54:37.486Z" }, ] +[[package]] +name = "psycopg2-binary" +version = "2.9.10" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cb/0e/bdc8274dc0585090b4e3432267d7be4dfbfd8971c0fa59167c711105a6bf/psycopg2-binary-2.9.10.tar.gz", hash = "sha256:4b3df0e6990aa98acda57d983942eff13d824135fe2250e6522edaa782a06de2", size = 385764, upload-time = "2024-10-16T11:24:58.126Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7a/81/331257dbf2801cdb82105306042f7a1637cc752f65f2bb688188e0de5f0b/psycopg2_binary-2.9.10-cp310-cp310-macosx_12_0_x86_64.whl", hash = "sha256:0ea8e3d0ae83564f2fc554955d327fa081d065c8ca5cc6d2abb643e2c9c1200f", size = 3043397, upload-time = "2024-10-16T11:18:58.647Z" }, + { url = "https://files.pythonhosted.org/packages/e7/9a/7f4f2f031010bbfe6a02b4a15c01e12eb6b9b7b358ab33229f28baadbfc1/psycopg2_binary-2.9.10-cp310-cp310-macosx_14_0_arm64.whl", hash = "sha256:3e9c76f0ac6f92ecfc79516a8034a544926430f7b080ec5a0537bca389ee0906", size = 3274806, upload-time = "2024-10-16T11:19:03.935Z" }, + { url = "https://files.pythonhosted.org/packages/e5/57/8ddd4b374fa811a0b0a0f49b6abad1cde9cb34df73ea3348cc283fcd70b4/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2ad26b467a405c798aaa1458ba09d7e2b6e5f96b1ce0ac15d82fd9f95dc38a92", size = 2851361, upload-time = "2024-10-16T11:19:07.277Z" }, + { url = "https://files.pythonhosted.org/packages/f9/66/d1e52c20d283f1f3a8e7e5c1e06851d432f123ef57b13043b4f9b21ffa1f/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:270934a475a0e4b6925b5f804e3809dd5f90f8613621d062848dd82f9cd62007", size = 3080836, upload-time = "2024-10-16T11:19:11.033Z" }, + { url = "https://files.pythonhosted.org/packages/a0/cb/592d44a9546aba78f8a1249021fe7c59d3afb8a0ba51434d6610cc3462b6/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:48b338f08d93e7be4ab2b5f1dbe69dc5e9ef07170fe1f86514422076d9c010d0", size = 3264552, upload-time = "2024-10-16T11:19:14.606Z" }, + { url = "https://files.pythonhosted.org/packages/64/33/c8548560b94b7617f203d7236d6cdf36fe1a5a3645600ada6efd79da946f/psycopg2_binary-2.9.10-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7f4152f8f76d2023aac16285576a9ecd2b11a9895373a1f10fd9db54b3ff06b4", size = 3019789, upload-time = "2024-10-16T11:19:18.889Z" }, + { url = "https://files.pythonhosted.org/packages/b0/0e/c2da0db5bea88a3be52307f88b75eec72c4de62814cbe9ee600c29c06334/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:32581b3020c72d7a421009ee1c6bf4a131ef5f0a968fab2e2de0c9d2bb4577f1", size = 2871776, upload-time = "2024-10-16T11:19:23.023Z" }, + { url = "https://files.pythonhosted.org/packages/15/d7/774afa1eadb787ddf41aab52d4c62785563e29949613c958955031408ae6/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:2ce3e21dc3437b1d960521eca599d57408a695a0d3c26797ea0f72e834c7ffe5", size = 2820959, upload-time = "2024-10-16T11:19:26.906Z" }, + { url = "https://files.pythonhosted.org/packages/5e/ed/440dc3f5991a8c6172a1cde44850ead0e483a375277a1aef7cfcec00af07/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:e984839e75e0b60cfe75e351db53d6db750b00de45644c5d1f7ee5d1f34a1ce5", size = 2919329, upload-time = "2024-10-16T11:19:30.027Z" }, + { url = "https://files.pythonhosted.org/packages/03/be/2cc8f4282898306732d2ae7b7378ae14e8df3c1231b53579efa056aae887/psycopg2_binary-2.9.10-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:3c4745a90b78e51d9ba06e2088a2fe0c693ae19cc8cb051ccda44e8df8a6eb53", size = 2957659, upload-time = "2024-10-16T11:19:32.864Z" }, + { url = "https://files.pythonhosted.org/packages/d0/12/fb8e4f485d98c570e00dad5800e9a2349cfe0f71a767c856857160d343a5/psycopg2_binary-2.9.10-cp310-cp310-win32.whl", hash = "sha256:e5720a5d25e3b99cd0dc5c8a440570469ff82659bb09431c1439b92caf184d3b", size = 1024605, upload-time = "2024-10-16T11:19:35.462Z" }, + { url = "https://files.pythonhosted.org/packages/22/4f/217cd2471ecf45d82905dd09085e049af8de6cfdc008b6663c3226dc1c98/psycopg2_binary-2.9.10-cp310-cp310-win_amd64.whl", hash = "sha256:3c18f74eb4386bf35e92ab2354a12c17e5eb4d9798e4c0ad3a00783eae7cd9f1", size = 1163817, upload-time = "2024-10-16T11:19:37.384Z" }, + { url = "https://files.pythonhosted.org/packages/9c/8f/9feb01291d0d7a0a4c6a6bab24094135c2b59c6a81943752f632c75896d6/psycopg2_binary-2.9.10-cp311-cp311-macosx_12_0_x86_64.whl", hash = "sha256:04392983d0bb89a8717772a193cfaac58871321e3ec69514e1c4e0d4957b5aff", size = 3043397, upload-time = "2024-10-16T11:19:40.033Z" }, + { url = "https://files.pythonhosted.org/packages/15/30/346e4683532011561cd9c8dfeac6a8153dd96452fee0b12666058ab7893c/psycopg2_binary-2.9.10-cp311-cp311-macosx_14_0_arm64.whl", hash = "sha256:1a6784f0ce3fec4edc64e985865c17778514325074adf5ad8f80636cd029ef7c", size = 3274806, upload-time = "2024-10-16T11:19:43.5Z" }, + { url = "https://files.pythonhosted.org/packages/66/6e/4efebe76f76aee7ec99166b6c023ff8abdc4e183f7b70913d7c047701b79/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b5f86c56eeb91dc3135b3fd8a95dc7ae14c538a2f3ad77a19645cf55bab1799c", size = 2851370, upload-time = "2024-10-16T11:19:46.986Z" }, + { url = "https://files.pythonhosted.org/packages/7f/fd/ff83313f86b50f7ca089b161b8e0a22bb3c319974096093cd50680433fdb/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:2b3d2491d4d78b6b14f76881905c7a8a8abcf974aad4a8a0b065273a0ed7a2cb", size = 3080780, upload-time = "2024-10-16T11:19:50.242Z" }, + { url = "https://files.pythonhosted.org/packages/e6/c4/bfadd202dcda8333a7ccafdc51c541dbdfce7c2c7cda89fa2374455d795f/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2286791ececda3a723d1910441c793be44625d86d1a4e79942751197f4d30341", size = 3264583, upload-time = "2024-10-16T11:19:54.424Z" }, + { url = "https://files.pythonhosted.org/packages/5d/f1/09f45ac25e704ac954862581f9f9ae21303cc5ded3d0b775532b407f0e90/psycopg2_binary-2.9.10-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:512d29bb12608891e349af6a0cccedce51677725a921c07dba6342beaf576f9a", size = 3019831, upload-time = "2024-10-16T11:19:57.762Z" }, + { url = "https://files.pythonhosted.org/packages/9e/2e/9beaea078095cc558f215e38f647c7114987d9febfc25cb2beed7c3582a5/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:5a507320c58903967ef7384355a4da7ff3f28132d679aeb23572753cbf2ec10b", size = 2871822, upload-time = "2024-10-16T11:20:04.693Z" }, + { url = "https://files.pythonhosted.org/packages/01/9e/ef93c5d93f3dc9fc92786ffab39e323b9aed066ba59fdc34cf85e2722271/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:6d4fa1079cab9018f4d0bd2db307beaa612b0d13ba73b5c6304b9fe2fb441ff7", size = 2820975, upload-time = "2024-10-16T11:20:11.401Z" }, + { url = "https://files.pythonhosted.org/packages/a5/f0/049e9631e3268fe4c5a387f6fc27e267ebe199acf1bc1bc9cbde4bd6916c/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:851485a42dbb0bdc1edcdabdb8557c09c9655dfa2ca0460ff210522e073e319e", size = 2919320, upload-time = "2024-10-16T11:20:17.959Z" }, + { url = "https://files.pythonhosted.org/packages/dc/9a/bcb8773b88e45fb5a5ea8339e2104d82c863a3b8558fbb2aadfe66df86b3/psycopg2_binary-2.9.10-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:35958ec9e46432d9076286dda67942ed6d968b9c3a6a2fd62b48939d1d78bf68", size = 2957617, upload-time = "2024-10-16T11:20:24.711Z" }, + { url = "https://files.pythonhosted.org/packages/e2/6b/144336a9bf08a67d217b3af3246abb1d027095dab726f0687f01f43e8c03/psycopg2_binary-2.9.10-cp311-cp311-win32.whl", hash = "sha256:ecced182e935529727401b24d76634a357c71c9275b356efafd8a2a91ec07392", size = 1024618, upload-time = "2024-10-16T11:20:27.718Z" }, + { url = "https://files.pythonhosted.org/packages/61/69/3b3d7bd583c6d3cbe5100802efa5beacaacc86e37b653fc708bf3d6853b8/psycopg2_binary-2.9.10-cp311-cp311-win_amd64.whl", hash = "sha256:ee0e8c683a7ff25d23b55b11161c2663d4b099770f6085ff0a20d4505778d6b4", size = 1163816, upload-time = "2024-10-16T11:20:30.777Z" }, + { url = "https://files.pythonhosted.org/packages/49/7d/465cc9795cf76f6d329efdafca74693714556ea3891813701ac1fee87545/psycopg2_binary-2.9.10-cp312-cp312-macosx_12_0_x86_64.whl", hash = "sha256:880845dfe1f85d9d5f7c412efea7a08946a46894537e4e5d091732eb1d34d9a0", size = 3044771, upload-time = "2024-10-16T11:20:35.234Z" }, + { url = "https://files.pythonhosted.org/packages/8b/31/6d225b7b641a1a2148e3ed65e1aa74fc86ba3fee850545e27be9e1de893d/psycopg2_binary-2.9.10-cp312-cp312-macosx_14_0_arm64.whl", hash = "sha256:9440fa522a79356aaa482aa4ba500b65f28e5d0e63b801abf6aa152a29bd842a", size = 3275336, upload-time = "2024-10-16T11:20:38.742Z" }, + { url = "https://files.pythonhosted.org/packages/30/b7/a68c2b4bff1cbb1728e3ec864b2d92327c77ad52edcd27922535a8366f68/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e3923c1d9870c49a2d44f795df0c889a22380d36ef92440ff618ec315757e539", size = 2851637, upload-time = "2024-10-16T11:20:42.145Z" }, + { url = "https://files.pythonhosted.org/packages/0b/b1/cfedc0e0e6f9ad61f8657fd173b2f831ce261c02a08c0b09c652b127d813/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7b2c956c028ea5de47ff3a8d6b3cc3330ab45cf0b7c3da35a2d6ff8420896526", size = 3082097, upload-time = "2024-10-16T11:20:46.185Z" }, + { url = "https://files.pythonhosted.org/packages/18/ed/0a8e4153c9b769f59c02fb5e7914f20f0b2483a19dae7bf2db54b743d0d0/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f758ed67cab30b9a8d2833609513ce4d3bd027641673d4ebc9c067e4d208eec1", size = 3264776, upload-time = "2024-10-16T11:20:50.879Z" }, + { url = "https://files.pythonhosted.org/packages/10/db/d09da68c6a0cdab41566b74e0a6068a425f077169bed0946559b7348ebe9/psycopg2_binary-2.9.10-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8cd9b4f2cfab88ed4a9106192de509464b75a906462fb846b936eabe45c2063e", size = 3020968, upload-time = "2024-10-16T11:20:56.819Z" }, + { url = "https://files.pythonhosted.org/packages/94/28/4d6f8c255f0dfffb410db2b3f9ac5218d959a66c715c34cac31081e19b95/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dc08420625b5a20b53551c50deae6e231e6371194fa0651dbe0fb206452ae1f", size = 2872334, upload-time = "2024-10-16T11:21:02.411Z" }, + { url = "https://files.pythonhosted.org/packages/05/f7/20d7bf796593c4fea95e12119d6cc384ff1f6141a24fbb7df5a668d29d29/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:d7cd730dfa7c36dbe8724426bf5612798734bff2d3c3857f36f2733f5bfc7c00", size = 2822722, upload-time = "2024-10-16T11:21:09.01Z" }, + { url = "https://files.pythonhosted.org/packages/4d/e4/0c407ae919ef626dbdb32835a03b6737013c3cc7240169843965cada2bdf/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:155e69561d54d02b3c3209545fb08938e27889ff5a10c19de8d23eb5a41be8a5", size = 2920132, upload-time = "2024-10-16T11:21:16.339Z" }, + { url = "https://files.pythonhosted.org/packages/2d/70/aa69c9f69cf09a01da224909ff6ce8b68faeef476f00f7ec377e8f03be70/psycopg2_binary-2.9.10-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:c3cc28a6fd5a4a26224007712e79b81dbaee2ffb90ff406256158ec4d7b52b47", size = 2959312, upload-time = "2024-10-16T11:21:25.584Z" }, + { url = "https://files.pythonhosted.org/packages/d3/bd/213e59854fafe87ba47814bf413ace0dcee33a89c8c8c814faca6bc7cf3c/psycopg2_binary-2.9.10-cp312-cp312-win32.whl", hash = "sha256:ec8a77f521a17506a24a5f626cb2aee7850f9b69a0afe704586f63a464f3cd64", size = 1025191, upload-time = "2024-10-16T11:21:29.912Z" }, + { url = "https://files.pythonhosted.org/packages/92/29/06261ea000e2dc1e22907dbbc483a1093665509ea586b29b8986a0e56733/psycopg2_binary-2.9.10-cp312-cp312-win_amd64.whl", hash = "sha256:18c5ee682b9c6dd3696dad6e54cc7ff3a1a9020df6a5c0f861ef8bfd338c3ca0", size = 1164031, upload-time = "2024-10-16T11:21:34.211Z" }, + { url = "https://files.pythonhosted.org/packages/3e/30/d41d3ba765609c0763505d565c4d12d8f3c79793f0d0f044ff5a28bf395b/psycopg2_binary-2.9.10-cp313-cp313-macosx_12_0_x86_64.whl", hash = "sha256:26540d4a9a4e2b096f1ff9cce51253d0504dca5a85872c7f7be23be5a53eb18d", size = 3044699, upload-time = "2024-10-16T11:21:42.841Z" }, + { url = "https://files.pythonhosted.org/packages/35/44/257ddadec7ef04536ba71af6bc6a75ec05c5343004a7ec93006bee66c0bc/psycopg2_binary-2.9.10-cp313-cp313-macosx_14_0_arm64.whl", hash = "sha256:e217ce4d37667df0bc1c397fdcd8de5e81018ef305aed9415c3b093faaeb10fb", size = 3275245, upload-time = "2024-10-16T11:21:51.989Z" }, + { url = "https://files.pythonhosted.org/packages/1b/11/48ea1cd11de67f9efd7262085588790a95d9dfcd9b8a687d46caf7305c1a/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:245159e7ab20a71d989da00f280ca57da7641fa2cdcf71749c193cea540a74f7", size = 2851631, upload-time = "2024-10-16T11:21:57.584Z" }, + { url = "https://files.pythonhosted.org/packages/62/e0/62ce5ee650e6c86719d621a761fe4bc846ab9eff8c1f12b1ed5741bf1c9b/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3c4ded1a24b20021ebe677b7b08ad10bf09aac197d6943bfe6fec70ac4e4690d", size = 3082140, upload-time = "2024-10-16T11:22:02.005Z" }, + { url = "https://files.pythonhosted.org/packages/27/ce/63f946c098611f7be234c0dd7cb1ad68b0b5744d34f68062bb3c5aa510c8/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:3abb691ff9e57d4a93355f60d4f4c1dd2d68326c968e7db17ea96df3c023ef73", size = 3264762, upload-time = "2024-10-16T11:22:06.412Z" }, + { url = "https://files.pythonhosted.org/packages/43/25/c603cd81402e69edf7daa59b1602bd41eb9859e2824b8c0855d748366ac9/psycopg2_binary-2.9.10-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8608c078134f0b3cbd9f89b34bd60a943b23fd33cc5f065e8d5f840061bd0673", size = 3020967, upload-time = "2024-10-16T11:22:11.583Z" }, + { url = "https://files.pythonhosted.org/packages/5f/d6/8708d8c6fca531057fa170cdde8df870e8b6a9b136e82b361c65e42b841e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:230eeae2d71594103cd5b93fd29d1ace6420d0b86f4778739cb1a5a32f607d1f", size = 2872326, upload-time = "2024-10-16T11:22:16.406Z" }, + { url = "https://files.pythonhosted.org/packages/ce/ac/5b1ea50fc08a9df82de7e1771537557f07c2632231bbab652c7e22597908/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:bb89f0a835bcfc1d42ccd5f41f04870c1b936d8507c6df12b7737febc40f0909", size = 2822712, upload-time = "2024-10-16T11:22:21.366Z" }, + { url = "https://files.pythonhosted.org/packages/c4/fc/504d4503b2abc4570fac3ca56eb8fed5e437bf9c9ef13f36b6621db8ef00/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_ppc64le.whl", hash = "sha256:f0c2d907a1e102526dd2986df638343388b94c33860ff3bbe1384130828714b1", size = 2920155, upload-time = "2024-10-16T11:22:25.684Z" }, + { url = "https://files.pythonhosted.org/packages/b2/d1/323581e9273ad2c0dbd1902f3fb50c441da86e894b6e25a73c3fda32c57e/psycopg2_binary-2.9.10-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f8157bed2f51db683f31306aa497311b560f2265998122abe1dce6428bd86567", size = 2959356, upload-time = "2024-10-16T11:22:30.562Z" }, + { url = "https://files.pythonhosted.org/packages/08/50/d13ea0a054189ae1bc21af1d85b6f8bb9bbc5572991055d70ad9006fe2d6/psycopg2_binary-2.9.10-cp313-cp313-win_amd64.whl", hash = "sha256:27422aa5f11fbcd9b18da48373eb67081243662f9b46e6fd07c3eb46e4535142", size = 2569224, upload-time = "2025-01-04T20:09:19.234Z" }, +] + [[package]] name = "ptyprocess" version = "0.7.0" @@ -5974,6 +5845,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/a7/021c9f65ba92c7ac669a11a8b3c425fe8e8e91c2cc0ead32a6e4024ecdc6/pymongo-4.15.0-cp313-cp313t-win_arm64.whl", hash = "sha256:010297ecaebded4d2d759e118319e3b9bdce9d371d00ca9f5e47a58a546748cc", size = 992287, upload-time = "2025-09-10T16:46:26.629Z" }, ] +[[package]] +name = "pymysql" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/f5/ae/1fe3fcd9f959efa0ebe200b8de88b5a5ce3e767e38c7ac32fb179f16a388/pymysql-1.1.2.tar.gz", hash = "sha256:4961d3e165614ae65014e361811a724e2044ad3ea3739de9903ae7c21f539f03", size = 48258, upload-time = "2025-08-24T12:55:55.146Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7c/4c/ad33b92b9864cbde84f259d5df035a6447f91891f5be77788e2a3892bce3/pymysql-1.1.2-py3-none-any.whl", hash = "sha256:e6b1d89711dd51f8f74b1631fe08f039e7d76cf67a42a323d3178f0f25762ed9", size = 45300, upload-time = "2025-08-24T12:55:53.394Z" }, +] + [[package]] name = "pynacl" version = "1.6.0" @@ -6032,14 +5912,14 @@ wheels = [ [[package]] name = "pypdf" -version = "5.7.0" +version = "6.0.0" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "typing-extensions", marker = "python_full_version < '3.11'" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/7b/42/fbc37af367b20fa6c53da81b1780025f6046a0fac8cbf0663a17e743b033/pypdf-5.7.0.tar.gz", hash = "sha256:68c92f2e1aae878bab1150e74447f31ab3848b1c0a6f8becae9f0b1904460b6f", size = 5026120, upload-time = "2025-06-29T08:49:48.305Z" } +sdist = { url = "https://files.pythonhosted.org/packages/20/ac/a300a03c3b34967c050677ccb16e7a4b65607ee5df9d51e8b6d713de4098/pypdf-6.0.0.tar.gz", hash = "sha256:282a99d2cc94a84a3a3159f0d9358c0af53f85b4d28d76ea38b96e9e5ac2a08d", size = 5033827, upload-time = "2025-08-11T14:22:02.352Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/73/9f/78d096ef795a813fa0e1cb9b33fa574b205f2b563d9c1e9366c854cf0364/pypdf-5.7.0-py3-none-any.whl", hash = "sha256:203379453439f5b68b7a1cd43cdf4c5f7a02b84810cefa7f93a47b350aaaba48", size = 305524, upload-time = "2025-06-29T08:49:46.16Z" }, + { url = "https://files.pythonhosted.org/packages/2c/83/2cacc506eb322bb31b747bc06ccb82cc9aa03e19ee9c1245e538e49d52be/pypdf-6.0.0-py3-none-any.whl", hash = "sha256:56ea60100ce9f11fc3eec4f359da15e9aec3821b036c1f06d2b660d35683abb8", size = 310465, upload-time = "2025-08-11T14:22:00.481Z" }, ] [[package]] @@ -6092,14 +5972,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/5a/dc/491b7661614ab97483abf2056be1deee4dc2490ecbf7bff9ab5cdbac86e1/pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6", size = 83178, upload-time = "2024-09-19T02:40:08.598Z" }, ] -[[package]] -name = "pysbd" -version = "0.3.4" -source = { registry = "https://pypi.org/simple" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/48/0a/c99fb7d7e176f8b176ef19704a32e6a9c6aafdf19ef75a187f701fc15801/pysbd-0.3.4-py3-none-any.whl", hash = "sha256:cd838939b7b0b185fcf86b0baf6636667dfb6e474743beeff878e9f42e022953", size = 71082, upload-time = "2021-02-11T16:36:33.351Z" }, -] - [[package]] name = "pysher" version = "1.0.8" @@ -6881,28 +6753,28 @@ wheels = [ [[package]] name = "ruff" -version = "0.12.11" +version = "0.13.1" source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/de/55/16ab6a7d88d93001e1ae4c34cbdcfb376652d761799459ff27c1dc20f6fa/ruff-0.12.11.tar.gz", hash = "sha256:c6b09ae8426a65bbee5425b9d0b82796dbb07cb1af045743c79bfb163001165d", size = 5347103, upload-time = "2025-08-28T13:59:08.87Z" } +sdist = { url = "https://files.pythonhosted.org/packages/ab/33/c8e89216845615d14d2d42ba2bee404e7206a8db782f33400754f3799f05/ruff-0.13.1.tar.gz", hash = "sha256:88074c3849087f153d4bb22e92243ad4c1b366d7055f98726bc19aa08dc12d51", size = 5397987, upload-time = "2025-09-18T19:52:44.33Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/d6/a2/3b3573e474de39a7a475f3fbaf36a25600bfeb238e1a90392799163b64a0/ruff-0.12.11-py3-none-linux_armv6l.whl", hash = "sha256:93fce71e1cac3a8bf9200e63a38ac5c078f3b6baebffb74ba5274fb2ab276065", size = 11979885, upload-time = "2025-08-28T13:58:26.654Z" }, - { url = "https://files.pythonhosted.org/packages/76/e4/235ad6d1785a2012d3ded2350fd9bc5c5af8c6f56820e696b0118dfe7d24/ruff-0.12.11-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:b8e33ac7b28c772440afa80cebb972ffd823621ded90404f29e5ab6d1e2d4b93", size = 12742364, upload-time = "2025-08-28T13:58:30.256Z" }, - { url = "https://files.pythonhosted.org/packages/2c/0d/15b72c5fe6b1e402a543aa9d8960e0a7e19dfb079f5b0b424db48b7febab/ruff-0.12.11-py3-none-macosx_11_0_arm64.whl", hash = "sha256:d69fb9d4937aa19adb2e9f058bc4fbfe986c2040acb1a4a9747734834eaa0bfd", size = 11920111, upload-time = "2025-08-28T13:58:33.677Z" }, - { url = "https://files.pythonhosted.org/packages/3e/c0/f66339d7893798ad3e17fa5a1e587d6fd9806f7c1c062b63f8b09dda6702/ruff-0.12.11-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:411954eca8464595077a93e580e2918d0a01a19317af0a72132283e28ae21bee", size = 12160060, upload-time = "2025-08-28T13:58:35.74Z" }, - { url = "https://files.pythonhosted.org/packages/03/69/9870368326db26f20c946205fb2d0008988aea552dbaec35fbacbb46efaa/ruff-0.12.11-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:6a2c0a2e1a450f387bf2c6237c727dd22191ae8c00e448e0672d624b2bbd7fb0", size = 11799848, upload-time = "2025-08-28T13:58:38.051Z" }, - { url = "https://files.pythonhosted.org/packages/25/8c/dd2c7f990e9b3a8a55eee09d4e675027d31727ce33cdb29eab32d025bdc9/ruff-0.12.11-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8ca4c3a7f937725fd2413c0e884b5248a19369ab9bdd850b5781348ba283f644", size = 13536288, upload-time = "2025-08-28T13:58:40.046Z" }, - { url = "https://files.pythonhosted.org/packages/7a/30/d5496fa09aba59b5e01ea76775a4c8897b13055884f56f1c35a4194c2297/ruff-0.12.11-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:4d1df0098124006f6a66ecf3581a7f7e754c4df7644b2e6704cd7ca80ff95211", size = 14490633, upload-time = "2025-08-28T13:58:42.285Z" }, - { url = "https://files.pythonhosted.org/packages/9b/2f/81f998180ad53445d403c386549d6946d0748e536d58fce5b5e173511183/ruff-0.12.11-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5a8dd5f230efc99a24ace3b77e3555d3fbc0343aeed3fc84c8d89e75ab2ff793", size = 13888430, upload-time = "2025-08-28T13:58:44.641Z" }, - { url = "https://files.pythonhosted.org/packages/87/71/23a0d1d5892a377478c61dbbcffe82a3476b050f38b5162171942a029ef3/ruff-0.12.11-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4dc75533039d0ed04cd33fb8ca9ac9620b99672fe7ff1533b6402206901c34ee", size = 12913133, upload-time = "2025-08-28T13:58:47.039Z" }, - { url = "https://files.pythonhosted.org/packages/80/22/3c6cef96627f89b344c933781ed38329bfb87737aa438f15da95907cbfd5/ruff-0.12.11-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4fc58f9266d62c6eccc75261a665f26b4ef64840887fc6cbc552ce5b29f96cc8", size = 13169082, upload-time = "2025-08-28T13:58:49.157Z" }, - { url = "https://files.pythonhosted.org/packages/05/b5/68b3ff96160d8b49e8dd10785ff3186be18fd650d356036a3770386e6c7f/ruff-0.12.11-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:5a0113bd6eafd545146440225fe60b4e9489f59eb5f5f107acd715ba5f0b3d2f", size = 13139490, upload-time = "2025-08-28T13:58:51.593Z" }, - { url = "https://files.pythonhosted.org/packages/59/b9/050a3278ecd558f74f7ee016fbdf10591d50119df8d5f5da45a22c6afafc/ruff-0.12.11-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:0d737b4059d66295c3ea5720e6efc152623bb83fde5444209b69cd33a53e2000", size = 11958928, upload-time = "2025-08-28T13:58:53.943Z" }, - { url = "https://files.pythonhosted.org/packages/f9/bc/93be37347db854806904a43b0493af8d6873472dfb4b4b8cbb27786eb651/ruff-0.12.11-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:916fc5defee32dbc1fc1650b576a8fed68f5e8256e2180d4d9855aea43d6aab2", size = 11764513, upload-time = "2025-08-28T13:58:55.976Z" }, - { url = "https://files.pythonhosted.org/packages/7a/a1/1471751e2015a81fd8e166cd311456c11df74c7e8769d4aabfbc7584c7ac/ruff-0.12.11-py3-none-musllinux_1_2_i686.whl", hash = "sha256:c984f07d7adb42d3ded5be894fb4007f30f82c87559438b4879fe7aa08c62b39", size = 12745154, upload-time = "2025-08-28T13:58:58.16Z" }, - { url = "https://files.pythonhosted.org/packages/68/ab/2542b14890d0f4872dd81b7b2a6aed3ac1786fae1ce9b17e11e6df9e31e3/ruff-0.12.11-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:e07fbb89f2e9249f219d88331c833860489b49cdf4b032b8e4432e9b13e8a4b9", size = 13227653, upload-time = "2025-08-28T13:59:00.276Z" }, - { url = "https://files.pythonhosted.org/packages/22/16/2fbfc61047dbfd009c58a28369a693a1484ad15441723be1cd7fe69bb679/ruff-0.12.11-py3-none-win32.whl", hash = "sha256:c792e8f597c9c756e9bcd4d87cf407a00b60af77078c96f7b6366ea2ce9ba9d3", size = 11944270, upload-time = "2025-08-28T13:59:02.347Z" }, - { url = "https://files.pythonhosted.org/packages/08/a5/34276984705bfe069cd383101c45077ee029c3fe3b28225bf67aa35f0647/ruff-0.12.11-py3-none-win_amd64.whl", hash = "sha256:a3283325960307915b6deb3576b96919ee89432ebd9c48771ca12ee8afe4a0fd", size = 13046600, upload-time = "2025-08-28T13:59:04.751Z" }, - { url = "https://files.pythonhosted.org/packages/84/a8/001d4a7c2b37623a3fd7463208267fb906df40ff31db496157549cfd6e72/ruff-0.12.11-py3-none-win_arm64.whl", hash = "sha256:bae4d6e6a2676f8fb0f98b74594a048bae1b944aab17e9f5d504062303c6dbea", size = 12135290, upload-time = "2025-08-28T13:59:06.933Z" }, + { url = "https://files.pythonhosted.org/packages/f3/41/ca37e340938f45cfb8557a97a5c347e718ef34702546b174e5300dbb1f28/ruff-0.13.1-py3-none-linux_armv6l.whl", hash = "sha256:b2abff595cc3cbfa55e509d89439b5a09a6ee3c252d92020bd2de240836cf45b", size = 12304308, upload-time = "2025-09-18T19:51:56.253Z" }, + { url = "https://files.pythonhosted.org/packages/ff/84/ba378ef4129415066c3e1c80d84e539a0d52feb250685091f874804f28af/ruff-0.13.1-py3-none-macosx_10_12_x86_64.whl", hash = "sha256:4ee9f4249bf7f8bb3984c41bfaf6a658162cdb1b22e3103eabc7dd1dc5579334", size = 12937258, upload-time = "2025-09-18T19:52:00.184Z" }, + { url = "https://files.pythonhosted.org/packages/8d/b6/ec5e4559ae0ad955515c176910d6d7c93edcbc0ed1a3195a41179c58431d/ruff-0.13.1-py3-none-macosx_11_0_arm64.whl", hash = "sha256:5c5da4af5f6418c07d75e6f3224e08147441f5d1eac2e6ce10dcce5e616a3bae", size = 12214554, upload-time = "2025-09-18T19:52:02.753Z" }, + { url = "https://files.pythonhosted.org/packages/70/d6/cb3e3b4f03b9b0c4d4d8f06126d34b3394f6b4d764912fe80a1300696ef6/ruff-0.13.1-py3-none-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:80524f84a01355a59a93cef98d804e2137639823bcee2931f5028e71134a954e", size = 12448181, upload-time = "2025-09-18T19:52:05.279Z" }, + { url = "https://files.pythonhosted.org/packages/d2/ea/bf60cb46d7ade706a246cd3fb99e4cfe854efa3dfbe530d049c684da24ff/ruff-0.13.1-py3-none-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff7f5ce8d7988767dd46a148192a14d0f48d1baea733f055d9064875c7d50389", size = 12104599, upload-time = "2025-09-18T19:52:07.497Z" }, + { url = "https://files.pythonhosted.org/packages/2d/3e/05f72f4c3d3a69e65d55a13e1dd1ade76c106d8546e7e54501d31f1dc54a/ruff-0.13.1-py3-none-manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:c55d84715061f8b05469cdc9a446aa6c7294cd4bd55e86a89e572dba14374f8c", size = 13791178, upload-time = "2025-09-18T19:52:10.189Z" }, + { url = "https://files.pythonhosted.org/packages/81/e7/01b1fc403dd45d6cfe600725270ecc6a8f8a48a55bc6521ad820ed3ceaf8/ruff-0.13.1-py3-none-manylinux_2_17_ppc64.manylinux2014_ppc64.whl", hash = "sha256:ac57fed932d90fa1624c946dc67a0a3388d65a7edc7d2d8e4ca7bddaa789b3b0", size = 14814474, upload-time = "2025-09-18T19:52:12.866Z" }, + { url = "https://files.pythonhosted.org/packages/fa/92/d9e183d4ed6185a8df2ce9faa3f22e80e95b5f88d9cc3d86a6d94331da3f/ruff-0.13.1-py3-none-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c366a71d5b4f41f86a008694f7a0d75fe409ec298685ff72dc882f882d532e36", size = 14217531, upload-time = "2025-09-18T19:52:15.245Z" }, + { url = "https://files.pythonhosted.org/packages/3b/4a/6ddb1b11d60888be224d721e01bdd2d81faaf1720592858ab8bac3600466/ruff-0.13.1-py3-none-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f4ea9d1b5ad3e7a83ee8ebb1229c33e5fe771e833d6d3dcfca7b77d95b060d38", size = 13265267, upload-time = "2025-09-18T19:52:17.649Z" }, + { url = "https://files.pythonhosted.org/packages/81/98/3f1d18a8d9ea33ef2ad508f0417fcb182c99b23258ec5e53d15db8289809/ruff-0.13.1-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b0f70202996055b555d3d74b626406476cc692f37b13bac8828acff058c9966a", size = 13243120, upload-time = "2025-09-18T19:52:20.332Z" }, + { url = "https://files.pythonhosted.org/packages/8d/86/b6ce62ce9c12765fa6c65078d1938d2490b2b1d9273d0de384952b43c490/ruff-0.13.1-py3-none-manylinux_2_31_riscv64.whl", hash = "sha256:f8cff7a105dad631085d9505b491db33848007d6b487c3c1979dd8d9b2963783", size = 13443084, upload-time = "2025-09-18T19:52:23.032Z" }, + { url = "https://files.pythonhosted.org/packages/a1/6e/af7943466a41338d04503fb5a81b2fd07251bd272f546622e5b1599a7976/ruff-0.13.1-py3-none-musllinux_1_2_aarch64.whl", hash = "sha256:9761e84255443316a258dd7dfbd9bfb59c756e52237ed42494917b2577697c6a", size = 12295105, upload-time = "2025-09-18T19:52:25.263Z" }, + { url = "https://files.pythonhosted.org/packages/3f/97/0249b9a24f0f3ebd12f007e81c87cec6d311de566885e9309fcbac5b24cc/ruff-0.13.1-py3-none-musllinux_1_2_armv7l.whl", hash = "sha256:3d376a88c3102ef228b102211ef4a6d13df330cb0f5ca56fdac04ccec2a99700", size = 12072284, upload-time = "2025-09-18T19:52:27.478Z" }, + { url = "https://files.pythonhosted.org/packages/f6/85/0b64693b2c99d62ae65236ef74508ba39c3febd01466ef7f354885e5050c/ruff-0.13.1-py3-none-musllinux_1_2_i686.whl", hash = "sha256:cbefd60082b517a82c6ec8836989775ac05f8991715d228b3c1d86ccc7df7dae", size = 12970314, upload-time = "2025-09-18T19:52:30.212Z" }, + { url = "https://files.pythonhosted.org/packages/96/fc/342e9f28179915d28b3747b7654f932ca472afbf7090fc0c4011e802f494/ruff-0.13.1-py3-none-musllinux_1_2_x86_64.whl", hash = "sha256:dd16b9a5a499fe73f3c2ef09a7885cb1d97058614d601809d37c422ed1525317", size = 13422360, upload-time = "2025-09-18T19:52:32.676Z" }, + { url = "https://files.pythonhosted.org/packages/37/54/6177a0dc10bce6f43e392a2192e6018755473283d0cf43cc7e6afc182aea/ruff-0.13.1-py3-none-win32.whl", hash = "sha256:55e9efa692d7cb18580279f1fbb525146adc401f40735edf0aaeabd93099f9a0", size = 12178448, upload-time = "2025-09-18T19:52:35.545Z" }, + { url = "https://files.pythonhosted.org/packages/64/51/c6a3a33d9938007b8bdc8ca852ecc8d810a407fb513ab08e34af12dc7c24/ruff-0.13.1-py3-none-win_amd64.whl", hash = "sha256:3a3fb595287ee556de947183489f636b9f76a72f0fa9c028bdcabf5bab2cc5e5", size = 13286458, upload-time = "2025-09-18T19:52:38.198Z" }, + { url = "https://files.pythonhosted.org/packages/fd/04/afc078a12cf68592345b1e2d6ecdff837d286bac023d7a22c54c7a698c5b/ruff-0.13.1-py3-none-win_arm64.whl", hash = "sha256:c0bae9ffd92d54e03c2bf266f466da0a65e145f298ee5b5846ed435f6a00518a", size = 12437893, upload-time = "2025-09-18T19:52:41.283Z" }, ] [[package]] @@ -6946,15 +6818,6 @@ torch = [ { name = "torch" }, ] -[[package]] -name = "schema" -version = "0.7.7" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/d4/01/0ea2e66bad2f13271e93b729c653747614784d3ebde219679e41ccdceecd/schema-0.7.7.tar.gz", hash = "sha256:7da553abd2958a19dc2547c388cde53398b39196175a9be59ea1caf5ab0a1807", size = 44245, upload-time = "2024-05-04T10:56:17.318Z" } -wheels = [ - { url = "https://files.pythonhosted.org/packages/ad/1b/81855a88c6db2b114d5b2e9f96339190d5ee4d1b981d217fa32127bb00e0/schema-0.7.7-py2.py3-none-any.whl", hash = "sha256:5d976a5b50f36e74e2157b47097b60002bd4d42e65425fcc9c9befadb4255dde", size = 18632, upload-time = "2024-05-04T10:56:13.86Z" }, -] - [[package]] name = "scikit-image" version = "0.25.2" @@ -8917,6 +8780,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b4/2d/2345fce04cfd4bee161bf1e7d9cdc702e3e16109021035dbb24db654a622/yarl-1.20.1-py3-none-any.whl", hash = "sha256:83b8eb083fe4683c6115795d9fc1cfaf2cbbefb19b3a1cb68f6527460f483a77", size = 46542, upload-time = "2025-06-10T00:46:07.521Z" }, ] +[[package]] +name = "youtube-transcript-api" +version = "1.2.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "defusedxml" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8f/f8/5e12d3d0c7001c3b3078697b9918241022bdb1ae12715e9debb00a83e16e/youtube_transcript_api-1.2.2.tar.gz", hash = "sha256:5f67cfaff3621d969778817a3d7b2172c16784855f45fcaed4f0529632e2fef4", size = 469634, upload-time = "2025-08-04T12:22:52.158Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/41/92/3d1a580f0efcad926f45876cf6cb92b2c260e84ae75dae5463bbf38f92e7/youtube_transcript_api-1.2.2-py3-none-any.whl", hash = "sha256:feca8c7f7c9d65188ef6377fc0e01cf466e6b68f1b3e648019646ab342f994d2", size = 485047, upload-time = "2025-08-04T12:22:50.836Z" }, +] + [[package]] name = "zipp" version = "3.23.0"