Squashed 'packages/tools/' content from commit 78317b9c

git-subtree-dir: packages/tools git-subtree-split: 78317b9c127f18bd040c1d77e3c0840cdc9a5b38
2026-01-24 15:48:23 +00:00 · 2025-09-12 21:58:02 -04:00
commit e16606672a
303 changed files with 49010 additions and 0 deletions
--- a/crewai_tools/tools/contextualai_parse_tool/README.md
+++ b/crewai_tools/tools/contextualai_parse_tool/README.md
@@ -0,0 +1,68 @@
+# ContextualAIParseTool
+
+## Description
+This tool is designed to integrate Contextual AI's enterprise-grade document parsing capabilities with CrewAI, enabling you to leverage advanced AI-powered document understanding for complex layouts, tables, and figures. Use this tool to extract structured content from your documents using Contextual AI's powerful document parser.
+
+## Installation
+To incorporate this tool into your project, follow the installation instructions below:
+
+```
+pip install 'crewai[tools]' contextual-client
+```
+
+**Note**: You'll need a Contextual AI API key. Sign up at [app.contextual.ai](https://app.contextual.ai) to get your free API key.
+
+## Example
+
+```python
+from crewai_tools import ContextualAIParseTool
+
+tool = ContextualAIParseTool(api_key="your_api_key_here")
+
+result = tool._run(
+    file_path="/path/to/document.pdf",
+    parse_mode="standard",
+    page_range="0-5",
+    output_types=["markdown-per-page"]
+)
+print(result)
+```
+
+The result will show the parsed contents of your document. For example: 
+```
+{
+  "file_name": "attention_is_all_you_need.pdf",
+  "status": "completed",
+  "pages": [
+    {
+      "index": 0,
+      "markdown": "Provided proper attribution ...
+    },
+    {
+      "index": 1,
+      "markdown": "## 1 Introduction ...
+    },
+    ...
+  ] 
+}
+```
+## Parameters
+- `api_key`: Your Contextual AI API key
+- `file_path`: Path to document to parse
+- `parse_mode`: Parsing mode (default: "standard")
+- `figure_caption_mode`: Figure caption handling (default: "concise")
+- `enable_document_hierarchy`: Enable hierarchy detection (default: True)
+- `page_range`: Pages to parse (e.g., "0-5", None for all)
+- `output_types`: Output formats (default: ["markdown-per-page"])
+
+## Key Features
+- **Advanced Document Understanding**: Handles complex PDF layouts, tables, and multi-column documents
+- **Figure and Table Extraction**: Intelligent extraction of figures, charts, and tabular data
+- **Page Range Selection**: Parse specific pages or entire documents
+
+## Use Cases
+- Extract structured content from complex PDFs and research papers
+- Parse financial reports, legal documents, and technical manuals
+- Convert documents to markdown for further processing in RAG pipelines
+
+For more detailed information about Contextual AI's capabilities, visit the [official documentation](https://docs.contextual.ai).
--- a/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py
+++ b/crewai_tools/tools/contextualai_parse_tool/contextual_parse_tool.py
@@ -0,0 +1,92 @@
+from typing import Any, Optional, Type, List
+from crewai.tools import BaseTool
+from pydantic import BaseModel, Field
+
+
+class ContextualAIParseSchema(BaseModel):
+    """Schema for contextual parse tool."""
+    file_path: str = Field(..., description="Path to the document to parse")
+    parse_mode: str = Field(default="standard", description="Parsing mode")
+    figure_caption_mode: str = Field(default="concise", description="Figure caption mode")
+    enable_document_hierarchy: bool = Field(default=True, description="Enable document hierarchy")
+    page_range: Optional[str] = Field(default=None, description="Page range to parse (e.g., '0-5')")
+    output_types: List[str] = Field(default=["markdown-per-page"], description="List of output types")
+
+
+class ContextualAIParseTool(BaseTool):
+    """Tool to parse documents using Contextual AI's parser."""
+    
+    name: str = "Contextual AI Document Parser"
+    description: str = "Parse documents using Contextual AI's advanced document parser"
+    args_schema: Type[BaseModel] = ContextualAIParseSchema
+    
+    api_key: str
+    package_dependencies: List[str] = ["contextual-client"]
+
+    def _run(
+        self, 
+        file_path: str, 
+        parse_mode: str = "standard",
+        figure_caption_mode: str = "concise",
+        enable_document_hierarchy: bool = True,
+        page_range: Optional[str] = None,
+        output_types: List[str] = ["markdown-per-page"]
+    ) -> str:
+        """Parse a document using Contextual AI's parser."""
+        try:
+            import requests
+            import json
+            import os
+            from time import sleep
+
+            if not os.path.exists(file_path):
+                raise FileNotFoundError(f"Document not found: {file_path}")
+
+            base_url = "https://api.contextual.ai/v1"
+            headers = {
+                "accept": "application/json",
+                "authorization": f"Bearer {self.api_key}"
+            }
+
+            # Submit parse job
+            url = f"{base_url}/parse"
+            config = {
+                "parse_mode": parse_mode,
+                "figure_caption_mode": figure_caption_mode,
+                "enable_document_hierarchy": enable_document_hierarchy,
+            }
+
+            if page_range:
+                config["page_range"] = page_range
+
+            with open(file_path, "rb") as fp:
+                file = {"raw_file": fp}
+                result = requests.post(url, headers=headers, data=config, files=file)
+                response = json.loads(result.text)
+                job_id = response['job_id']
+
+            # Monitor job status
+            status_url = f"{base_url}/parse/jobs/{job_id}/status"
+            while True:
+                result = requests.get(status_url, headers=headers)
+                parse_response = json.loads(result.text)['status']
+
+                if parse_response == "completed":
+                    break
+                elif parse_response == "failed":
+                    raise RuntimeError("Document parsing failed")
+
+                sleep(5)
+
+            # Get parse results
+            results_url = f"{base_url}/parse/jobs/{job_id}/results"
+            result = requests.get(
+                results_url,
+                headers=headers,
+                params={"output_types": ",".join(output_types)},
+            )
+
+            return json.dumps(json.loads(result.text), indent=2)
+
+        except Exception as e:
+            return f"Failed to parse document: {str(e)}"