From 97fc44c9301f63b9756a8622cec23804e8f85491 Mon Sep 17 00:00:00 2001 From: Erick Amorim <73451993+ericklima-ca@users.noreply.github.com> Date: Fri, 27 Dec 2024 20:18:25 -0400 Subject: [PATCH 01/11] fix: Change storage initialization to None for KnowledgeStorage (#1804) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix: Change storage initialization to None for KnowledgeStorage * refactor: Change storage field to optional and improve error handling when saving documents --------- Co-authored-by: João Moura --- src/crewai/knowledge/knowledge.py | 4 ++-- src/crewai/knowledge/source/base_file_knowledge_source.py | 7 +++++-- src/crewai/knowledge/source/base_knowledge_source.py | 7 +++++-- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py index f9f55a517..571542994 100644 --- a/src/crewai/knowledge/knowledge.py +++ b/src/crewai/knowledge/knowledge.py @@ -14,13 +14,13 @@ class Knowledge(BaseModel): Knowledge is a collection of sources and setup for the vector store to save and query relevant context. Args: sources: List[BaseKnowledgeSource] = Field(default_factory=list) - storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage) + storage: Optional[KnowledgeStorage] = Field(default=None) embedder_config: Optional[Dict[str, Any]] = None """ sources: List[BaseKnowledgeSource] = Field(default_factory=list) model_config = ConfigDict(arbitrary_types_allowed=True) - storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage) + storage: Optional[KnowledgeStorage] = Field(default=None) embedder_config: Optional[Dict[str, Any]] = None collection_name: Optional[str] = None diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index 8cee77e16..5743b1704 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -22,7 +22,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): default_factory=list, description="The path to the file" ) content: Dict[Path, str] = Field(init=False, default_factory=dict) - storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage) + storage: Optional[KnowledgeStorage] = Field(default=None) safe_file_paths: List[Path] = Field(default_factory=list) @field_validator("file_path", "file_paths", mode="before") @@ -62,7 +62,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): def _save_documents(self): """Save the documents to the storage.""" - self.storage.save(self.chunks) + if self.storage: + self.storage.save(self.chunks) + else: + raise ValueError("No storage found to save documents.") def convert_to_path(self, path: Union[Path, str]) -> Path: """Convert a path to a Path object.""" diff --git a/src/crewai/knowledge/source/base_knowledge_source.py b/src/crewai/knowledge/source/base_knowledge_source.py index 88c3ab360..b558a4b9a 100644 --- a/src/crewai/knowledge/source/base_knowledge_source.py +++ b/src/crewai/knowledge/source/base_knowledge_source.py @@ -16,7 +16,7 @@ class BaseKnowledgeSource(BaseModel, ABC): chunk_embeddings: List[np.ndarray] = Field(default_factory=list) model_config = ConfigDict(arbitrary_types_allowed=True) - storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage) + storage: Optional[KnowledgeStorage] = Field(default=None) metadata: Dict[str, Any] = Field(default_factory=dict) # Currently unused collection_name: Optional[str] = Field(default=None) @@ -46,4 +46,7 @@ class BaseKnowledgeSource(BaseModel, ABC): Save the documents to the storage. This method should be called after the chunks and embeddings are generated. """ - self.storage.save(self.chunks) + if self.storage: + self.storage.save(self.chunks) + else: + raise ValueError("No storage found to save documents.") From 2433819c4f56088f0f8790890887e13c4691a356 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Fri, 27 Dec 2024 21:30:39 -0300 Subject: [PATCH 02/11] fix: handle optional storage with null checks (#1808) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: João Moura --- src/crewai/knowledge/knowledge.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py index 571542994..c964333c8 100644 --- a/src/crewai/knowledge/knowledge.py +++ b/src/crewai/knowledge/knowledge.py @@ -49,8 +49,13 @@ class Knowledge(BaseModel): """ Query across all knowledge sources to find the most relevant information. Returns the top_k most relevant chunks. + + Raises: + ValueError: If storage is not initialized. """ - + if self.storage is None: + raise ValueError("Storage is not initialized.") + results = self.storage.search( query, limit, From 0c2d23dfe0140d9140f5406573687c9468b9c6ae Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 01:00:58 -0300 Subject: [PATCH 03/11] docs: update README to highlight Flows (#1809) * docs: highlight Flows feature in README Co-Authored-By: Joe Moura * docs: enhance README with LangGraph comparison and flows-crews synergy Co-Authored-By: Joe Moura * docs: replace initial Flow example with advanced Flow+Crew example; enhance LangGraph comparison Co-Authored-By: Joe Moura * docs: incorporate key terms and enhance feature descriptions Co-Authored-By: Joe Moura * docs: refine technical language, enhance feature descriptions, fix string interpolation Co-Authored-By: Joe Moura * docs: update README with performance metrics, feature enhancements, and course links Co-Authored-By: Joe Moura * docs: update LangGraph comparison with paragraph and P.S. section Co-Authored-By: Joe Moura --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura --- README.md | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 145 insertions(+), 11 deletions(-) diff --git a/README.md b/README.md index 5669c71a2..2cca0f019 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ # **CrewAI** -🤖 **CrewAI**: Cutting-edge framework for orchestrating role-playing, autonomous AI agents. By fostering collaborative intelligence, CrewAI empowers agents to work together seamlessly, tackling complex tasks. +🤖 **CrewAI**: Production-grade framework for orchestrating sophisticated AI agent systems. From simple automations to complex real-world applications, CrewAI provides precise control and deep customization. By fostering collaborative intelligence through flexible, production-ready architecture, CrewAI empowers agents to work together seamlessly, tackling complex business challenges with predictable, consistent results.

@@ -22,11 +22,13 @@ - [Why CrewAI?](#why-crewai) - [Getting Started](#getting-started) - [Key Features](#key-features) +- [Understanding Flows and Crews](#understanding-flows-and-crews) - [Examples](#examples) - [Quick Tutorial](#quick-tutorial) - [Write Job Descriptions](#write-job-descriptions) - [Trip Planner](#trip-planner) - [Stock Analysis](#stock-analysis) + - [Using Crews and Flows Together](#using-crews-and-flows-together) - [Connecting Your Crew to a Model](#connecting-your-crew-to-a-model) - [How CrewAI Compares](#how-crewai-compares) - [Contribution](#contribution) @@ -36,10 +38,40 @@ ## Why CrewAI? The power of AI collaboration has too much to offer. -CrewAI is designed to enable AI agents to assume roles, share goals, and operate in a cohesive unit - much like a well-oiled crew. Whether you're building a smart assistant platform, an automated customer service ensemble, or a multi-agent research team, CrewAI provides the backbone for sophisticated multi-agent interactions. +CrewAI is a standalone framework, built from the ground up without dependencies on Langchain or other agent frameworks. It's designed to enable AI agents to assume roles, share goals, and operate in a cohesive unit - much like a well-oiled crew. Whether you're building a smart assistant platform, an automated customer service ensemble, or a multi-agent research team, CrewAI provides the backbone for sophisticated multi-agent interactions. ## Getting Started +### Learning Resources + +Learn CrewAI through our comprehensive courses: +- [Multi AI Agent Systems with CrewAI](https://www.deeplearning.ai/short-courses/multi-ai-agent-systems-with-crewai/) - Master the fundamentals of multi-agent systems +- [Practical Multi AI Agents and Advanced Use Cases](https://www.deeplearning.ai/short-courses/practical-multi-ai-agents-and-advanced-use-cases-with-crewai/) - Deep dive into advanced implementations + +### Understanding Flows and Crews + +CrewAI offers two powerful, complementary approaches that work seamlessly together to build sophisticated AI applications: + +1. **Crews**: Teams of AI agents with true autonomy and agency, working together to accomplish complex tasks through role-based collaboration. Crews enable: + - Natural, autonomous decision-making between agents + - Dynamic task delegation and collaboration + - Specialized roles with defined goals and expertise + - Flexible problem-solving approaches + +2. **Flows**: Production-ready, event-driven workflows that deliver precise control over complex automations. Flows provide: + - Fine-grained control over execution paths for real-world scenarios + - Secure, consistent state management between tasks + - Clean integration of AI agents with production Python code + - Conditional branching for complex business logic + +The true power of CrewAI emerges when combining Crews and Flows. This synergy allows you to: +- Build complex, production-grade applications +- Balance autonomy with precise control +- Handle sophisticated real-world scenarios +- Maintain clean, maintainable code structure + +### Getting Started with Installation + To get started with CrewAI, follow these simple steps: ### 1. Installation @@ -264,13 +296,16 @@ In addition to the sequential process, you can use the hierarchical process, whi ## Key Features -- **Role-Based Agent Design**: Customize agents with specific roles, goals, and tools. -- **Autonomous Inter-Agent Delegation**: Agents can autonomously delegate tasks and inquire amongst themselves, enhancing problem-solving efficiency. -- **Flexible Task Management**: Define tasks with customizable tools and assign them to agents dynamically. -- **Processes Driven**: Currently only supports `sequential` task execution and `hierarchical` processes, but more complex processes like consensual and autonomous are being worked on. -- **Save output as file**: Save the output of individual tasks as a file, so you can use it later. -- **Parse output as Pydantic or Json**: Parse the output of individual tasks as a Pydantic model or as a Json if you want to. -- **Works with Open Source Models**: Run your crew using Open AI or open source models refer to the [Connect CrewAI to LLMs](https://docs.crewai.com/how-to/LLM-Connections/) page for details on configuring your agents' connections to models, even ones running locally! +**Note**: CrewAI is a standalone framework built from the ground up, without dependencies on Langchain or other agent frameworks. + +- **Deep Customization**: Build sophisticated agents with full control over the system - from overriding inner prompts to accessing low-level APIs. Customize roles, goals, tools, and behaviors while maintaining clean abstractions. +- **Autonomous Inter-Agent Delegation**: Agents can autonomously delegate tasks and inquire amongst themselves, enabling complex problem-solving in real-world scenarios. +- **Flexible Task Management**: Define and customize tasks with granular control, from simple operations to complex multi-step processes. +- **Production-Grade Architecture**: Support for both high-level abstractions and low-level customization, with robust error handling and state management. +- **Predictable Results**: Ensure consistent, accurate outputs through programmatic guardrails, agent training capabilities, and flow-based execution control. See our [documentation on guardrails](https://docs.crewai.com/how-to/guardrails/) for implementation details. +- **Model Flexibility**: Run your crew using OpenAI or open source models with production-ready integrations. See [Connect CrewAI to LLMs](https://docs.crewai.com/how-to/LLM-Connections/) for detailed configuration options. +- **Event-Driven Flows**: Build complex, real-world workflows with precise control over execution paths, state management, and conditional logic. +- **Process Orchestration**: Achieve any workflow pattern through flows - from simple sequential and hierarchical processes to complex, custom orchestration patterns with conditional branching and parallel execution. ![CrewAI Mind Map](./docs/crewAI-mindmap.png "CrewAI Mind Map") @@ -305,6 +340,98 @@ You can test different real life examples of AI crews in the [CrewAI-examples re [![Stock Analysis](https://img.youtube.com/vi/e0Uj4yWdaAg/maxresdefault.jpg)](https://www.youtube.com/watch?v=e0Uj4yWdaAg "Stock Analysis") +### Using Crews and Flows Together + +CrewAI's power truly shines when combining Crews with Flows to create sophisticated automation pipelines. Here's how you can orchestrate multiple Crews within a Flow: + +```python +from crewai.flow.flow import Flow, listen, start, router +from crewai import Crew, Agent, Task +from pydantic import BaseModel + +# Define structured state for precise control +class MarketState(BaseModel): + sentiment: str = "neutral" + confidence: float = 0.0 + recommendations: list = [] + +class AdvancedAnalysisFlow(Flow[MarketState]): + @start() + def fetch_market_data(self): + # Demonstrate low-level control with structured state + self.state.sentiment = "analyzing" + return {"sector": "tech", "timeframe": "1W"} # These parameters match the task description template + + @listen(fetch_market_data) + def analyze_with_crew(self, market_data): + # Show crew agency through specialized roles + analyst = Agent( + role="Senior Market Analyst", + goal="Conduct deep market analysis with expert insight", + backstory="You're a veteran analyst known for identifying subtle market patterns" + ) + researcher = Agent( + role="Data Researcher", + goal="Gather and validate supporting market data", + backstory="You excel at finding and correlating multiple data sources" + ) + + analysis_task = Task( + description="Analyze {sector} sector data for the past {timeframe}", + expected_output="Detailed market analysis with confidence score", + agent=analyst + ) + research_task = Task( + description="Find supporting data to validate the analysis", + expected_output="Corroborating evidence and potential contradictions", + agent=researcher + ) + + # Demonstrate crew autonomy + analysis_crew = Crew( + agents=[analyst, researcher], + tasks=[analysis_task, research_task], + process=Process.sequential, + verbose=True + ) + return analysis_crew.kickoff(inputs=market_data) # Pass market_data as named inputs + + @router(analyze_with_crew) + def determine_next_steps(self): + # Show flow control with conditional routing + if self.state.confidence > 0.8: + return "high_confidence" + elif self.state.confidence > 0.5: + return "medium_confidence" + return "low_confidence" + + @listen("high_confidence") + def execute_strategy(self): + # Demonstrate complex decision making + strategy_crew = Crew( + agents=[ + Agent(role="Strategy Expert", + goal="Develop optimal market strategy") + ], + tasks=[ + Task(description="Create detailed strategy based on analysis", + expected_output="Step-by-step action plan") + ] + ) + return strategy_crew.kickoff() + + @listen("medium_confidence", "low_confidence") + def request_additional_analysis(self): + self.state.recommendations.append("Gather more data") + return "Additional analysis required" +``` + +This example demonstrates how to: +1. Use Python code for basic data operations +2. Create and execute Crews as steps in your workflow +3. Use Flow decorators to manage the sequence of operations +4. Implement conditional branching based on Crew results + ## Connecting Your Crew to a Model CrewAI supports using various LLMs through a variety of connection options. By default your agents will use the OpenAI API when querying the model. However, there are several other ways to allow your agents to connect to models. For example, you can configure your agents to use a local model via the Ollama tool. @@ -313,9 +440,13 @@ Please refer to the [Connect CrewAI to LLMs](https://docs.crewai.com/how-to/LLM- ## How CrewAI Compares -**CrewAI's Advantage**: CrewAI is built with production in mind. It offers the flexibility of Autogen's conversational agents and the structured process approach of ChatDev, but without the rigidity. CrewAI's processes are designed to be dynamic and adaptable, fitting seamlessly into both development and production workflows. +**CrewAI's Advantage**: CrewAI combines autonomous agent intelligence with precise workflow control through its unique Crews and Flows architecture. The framework excels at both high-level orchestration and low-level customization, enabling complex, production-grade systems with granular control. -- **Autogen**: While Autogen does good in creating conversational agents capable of working together, it lacks an inherent concept of process. In Autogen, orchestrating agents' interactions requires additional programming, which can become complex and cumbersome as the scale of tasks grows. +- **LangGraph**: While LangGraph provides a foundation for building agent workflows, its approach requires significant boilerplate code and complex state management patterns. The framework's tight coupling with LangChain can limit flexibility when implementing custom agent behaviors or integrating with external systems. + +*P.S. CrewAI demonstrates significant performance advantages over LangGraph, executing 5.76x faster in certain cases like this QA task example ([see comparison](https://github.com/crewAIInc/crewAI-examples/tree/main/Notebooks/CrewAI%20Flows%20%26%20Langgraph/QA%20Agent)) while achieving higher evaluation scores with faster completion times in certain coding tasks, like in this example ([detailed analysis](https://github.com/crewAIInc/crewAI-examples/blob/main/Notebooks/CrewAI%20Flows%20%26%20Langgraph/Coding%20Assistant/coding_assistant_eval.ipynb)).* + +- **Autogen**: While Autogen excels at creating conversational agents capable of working together, it lacks an inherent concept of process. In Autogen, orchestrating agents' interactions requires additional programming, which can become complex and cumbersome as the scale of tasks grows. - **ChatDev**: ChatDev introduced the idea of processes into the realm of AI agents, but its implementation is quite rigid. Customizations in ChatDev are limited and not geared towards production environments, which can hinder scalability and flexibility in real-world applications. @@ -440,5 +571,8 @@ A: CrewAI uses anonymous telemetry to collect usage data for improvement purpose ### Q: Where can I find examples of CrewAI in action? A: You can find various real-life examples in the [CrewAI-examples repository](https://github.com/crewAIInc/crewAI-examples), including trip planners, stock analysis tools, and more. +### Q: What is the difference between Crews and Flows? +A: Crews and Flows serve different but complementary purposes in CrewAI. Crews are teams of AI agents working together to accomplish specific tasks through role-based collaboration, delivering accurate and predictable results. Flows, on the other hand, are event-driven workflows that can orchestrate both Crews and regular Python code, allowing you to build complex automation pipelines with secure state management and conditional execution paths. + ### Q: How can I contribute to CrewAI? A: Contributions are welcome! You can fork the repository, create a new branch for your feature, add your improvement, and send a pull request. Check the Contribution section in the README for more details. From 99fe91586d429708d0487bed4f9856e73c4a6131 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Sat, 28 Dec 2024 01:03:33 -0300 Subject: [PATCH 04/11] Update README.md --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 2cca0f019..bf1287d4d 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ - [Getting Started](#getting-started) - [Key Features](#key-features) - [Understanding Flows and Crews](#understanding-flows-and-crews) +- [CrewAI vs LangGraph](#how-crewai-compares) - [Examples](#examples) - [Quick Tutorial](#quick-tutorial) - [Write Job Descriptions](#write-job-descriptions) @@ -31,6 +32,7 @@ - [Using Crews and Flows Together](#using-crews-and-flows-together) - [Connecting Your Crew to a Model](#connecting-your-crew-to-a-model) - [How CrewAI Compares](#how-crewai-compares) +- [Frequently Asked Questions (FAQ)](#frequently-asked-questions-faq) - [Contribution](#contribution) - [Telemetry](#telemetry) - [License](#license) From 86f58c95deece1e7640af1a118df5d3b7bee949e Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sat, 28 Dec 2024 01:48:51 -0300 Subject: [PATCH 05/11] docs: add agent-specific knowledge documentation and examples (#1811) Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura --- docs/concepts/knowledge.mdx | 52 +++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/docs/concepts/knowledge.mdx b/docs/concepts/knowledge.mdx index 8df6f623f..8a777833e 100644 --- a/docs/concepts/knowledge.mdx +++ b/docs/concepts/knowledge.mdx @@ -171,6 +171,58 @@ crewai reset-memories --knowledge This is useful when you've updated your knowledge sources and want to ensure that the agents are using the most recent information. +## Agent-Specific Knowledge + +While knowledge can be provided at the crew level using `crew.knowledge_sources`, individual agents can also have their own knowledge sources using the `knowledge_sources` parameter: + +```python Code +from crewai import Agent, Task, Crew +from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource + +# Create agent-specific knowledge about a product +product_specs = StringKnowledgeSource( + content="""The XPS 13 laptop features: + - 13.4-inch 4K display + - Intel Core i7 processor + - 16GB RAM + - 512GB SSD storage + - 12-hour battery life""", + metadata={"category": "product_specs"} +) + +# Create a support agent with product knowledge +support_agent = Agent( + role="Technical Support Specialist", + goal="Provide accurate product information and support.", + backstory="You are an expert on our laptop products and specifications.", + knowledge_sources=[product_specs] # Agent-specific knowledge +) + +# Create a task that requires product knowledge +support_task = Task( + description="Answer this customer question: {question}", + agent=support_agent +) + +# Create and run the crew +crew = Crew( + agents=[support_agent], + tasks=[support_task] +) + +# Get answer about the laptop's specifications +result = crew.kickoff( + inputs={"question": "What is the storage capacity of the XPS 13?"} +) +``` + + + Benefits of agent-specific knowledge: + - Give agents specialized information for their roles + - Maintain separation of concerns between agents + - Combine with crew-level knowledge for layered information access + + ## Custom Knowledge Sources CrewAI allows you to create custom knowledge sources for any type of data by extending the `BaseKnowledgeSource` class. Let's create a practical example that fetches and processes space news articles. From a0c322a53552eee27a9a8c752bffaf5928aefa3c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jo=C3=A3o=20Moura?= Date: Sat, 28 Dec 2024 02:04:00 -0300 Subject: [PATCH 06/11] fixing file paths for knowledge source --- .../source/base_file_knowledge_source.py | 5 ++-- tests/knowledge/knowledge_test.py | 25 +++++++++++++++++++ 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index 5743b1704..ac345b6a6 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -26,9 +26,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): safe_file_paths: List[Path] = Field(default_factory=list) @field_validator("file_path", "file_paths", mode="before") - def validate_file_path(cls, v, values): + def validate_file_path(cls, v, info): """Validate that at least one of file_path or file_paths is provided.""" - if v is None and ("file_path" not in values or values.get("file_path") is None): + # Single check if both are None, O(1) instead of nested conditions + if v is None and info.data.get("file_path" if info.field_name == "file_paths" else "file_paths") is None: raise ValueError("Either file_path or file_paths must be provided") return v diff --git a/tests/knowledge/knowledge_test.py b/tests/knowledge/knowledge_test.py index 366067587..6704d3031 100644 --- a/tests/knowledge/knowledge_test.py +++ b/tests/knowledge/knowledge_test.py @@ -584,3 +584,28 @@ def test_docling_source_with_local_file(): docling_source = CrewDoclingSource(file_paths=[pdf_path]) assert docling_source.file_paths == [pdf_path] assert docling_source.content is not None + + +def test_file_path_validation(): + """Test file path validation for knowledge sources.""" + current_dir = Path(__file__).parent + pdf_path = current_dir / "crewai_quickstart.pdf" + + # Test valid single file_path + source = PDFKnowledgeSource(file_path=pdf_path) + assert source.safe_file_paths == [pdf_path] + + # Test valid file_paths list + source = PDFKnowledgeSource(file_paths=[pdf_path]) + assert source.safe_file_paths == [pdf_path] + + # Test both file_path and file_paths provided (should use file_paths) + source = PDFKnowledgeSource(file_path=pdf_path, file_paths=[pdf_path]) + assert source.safe_file_paths == [pdf_path] + + # Test neither file_path nor file_paths provided + with pytest.raises( + ValueError, + match="file_path/file_paths must be a Path, str, or a list of these types" + ): + PDFKnowledgeSource() From 73f328860b4a477a6d3736e646783d7493841cb4 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 29 Dec 2024 01:57:59 -0300 Subject: [PATCH 07/11] Fix interpolation for output_file in Task (#1803) (#1814) * fix: interpolate output_file attribute from YAML Co-Authored-By: Joe Moura * fix: add security validation for output_file paths Co-Authored-By: Joe Moura * fix: add _original_output_file private attribute to fix type-checker error Co-Authored-By: Joe Moura * fix: update interpolate_only to handle None inputs and remove duplicate attribute Co-Authored-By: Joe Moura * fix: improve output_file validation and error messages Co-Authored-By: Joe Moura * test: add end-to-end tests for output_file functionality Co-Authored-By: Joe Moura --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura --- src/crewai/task.py | 123 ++++++++- .../test_crew_output_file_end_to_end.yaml | 243 ++++++++++++++++++ tests/crew_test.py | 86 ++++++- tests/task_test.py | 65 ++++- 4 files changed, 503 insertions(+), 14 deletions(-) create mode 100644 tests/cassettes/test_crew_output_file_end_to_end.yaml diff --git a/src/crewai/task.py b/src/crewai/task.py index 30ab79c00..a63bde57d 100644 --- a/src/crewai/task.py +++ b/src/crewai/task.py @@ -179,6 +179,7 @@ class Task(BaseModel): _execution_span: Optional[Span] = PrivateAttr(default=None) _original_description: Optional[str] = PrivateAttr(default=None) _original_expected_output: Optional[str] = PrivateAttr(default=None) + _original_output_file: Optional[str] = PrivateAttr(default=None) _thread: Optional[threading.Thread] = PrivateAttr(default=None) _execution_time: Optional[float] = PrivateAttr(default=None) @@ -213,8 +214,46 @@ class Task(BaseModel): @field_validator("output_file") @classmethod - def output_file_validation(cls, value: str) -> str: - """Validate the output file path by removing the / from the beginning of the path.""" + def output_file_validation(cls, value: Optional[str]) -> Optional[str]: + """Validate the output file path. + + Args: + value: The output file path to validate. Can be None or a string. + If the path contains template variables (e.g. {var}), leading slashes are preserved. + For regular paths, leading slashes are stripped. + + Returns: + The validated and potentially modified path, or None if no path was provided. + + Raises: + ValueError: If the path contains invalid characters, path traversal attempts, + or other security concerns. + """ + if value is None: + return None + + # Basic security checks + if ".." in value: + raise ValueError("Path traversal attempts are not allowed in output_file paths") + + # Check for shell expansion first + if value.startswith('~') or value.startswith('$'): + raise ValueError("Shell expansion characters are not allowed in output_file paths") + + # Then check other shell special characters + if any(char in value for char in ['|', '>', '<', '&', ';']): + raise ValueError("Shell special characters are not allowed in output_file paths") + + # Don't strip leading slash if it's a template path with variables + if "{" in value or "}" in value: + # Validate template variable format + template_vars = [part.split("}")[0] for part in value.split("{")[1:]] + for var in template_vars: + if not var.isidentifier(): + raise ValueError(f"Invalid template variable name: {var}") + return value + + # Strip leading slash for regular paths if value.startswith("/"): return value[1:] return value @@ -393,27 +432,89 @@ class Task(BaseModel): tasks_slices = [self.description, output] return "\n".join(tasks_slices) - def interpolate_inputs(self, inputs: Dict[str, Any]) -> None: - """Interpolate inputs into the task description and expected output.""" + def interpolate_inputs(self, inputs: Dict[str, Union[str, int, float]]) -> None: + """Interpolate inputs into the task description, expected output, and output file path. + + Args: + inputs: Dictionary mapping template variables to their values. + Supported value types are strings, integers, and floats. + + Raises: + ValueError: If a required template variable is missing from inputs. + """ if self._original_description is None: self._original_description = self.description if self._original_expected_output is None: self._original_expected_output = self.expected_output + if self.output_file is not None and self._original_output_file is None: + self._original_output_file = self.output_file - if inputs: + if not inputs: + return + + try: self.description = self._original_description.format(**inputs) + except KeyError as e: + raise ValueError(f"Missing required template variable '{e.args[0]}' in description") from e + except ValueError as e: + raise ValueError(f"Error interpolating description: {str(e)}") from e + + try: self.expected_output = self.interpolate_only( input_string=self._original_expected_output, inputs=inputs ) + except (KeyError, ValueError) as e: + raise ValueError(f"Error interpolating expected_output: {str(e)}") from e - def interpolate_only(self, input_string: str, inputs: Dict[str, Any]) -> str: - """Interpolate placeholders (e.g., {key}) in a string while leaving JSON untouched.""" - escaped_string = input_string.replace("{", "{{").replace("}", "}}") + if self.output_file is not None: + try: + self.output_file = self.interpolate_only( + input_string=self._original_output_file, inputs=inputs + ) + except (KeyError, ValueError) as e: + raise ValueError(f"Error interpolating output_file path: {str(e)}") from e - for key in inputs.keys(): - escaped_string = escaped_string.replace(f"{{{{{key}}}}}", f"{{{key}}}") + def interpolate_only(self, input_string: Optional[str], inputs: Dict[str, Union[str, int, float]]) -> str: + """Interpolate placeholders (e.g., {key}) in a string while leaving JSON untouched. + + Args: + input_string: The string containing template variables to interpolate. + Can be None or empty, in which case an empty string is returned. + inputs: Dictionary mapping template variables to their values. + Supported value types are strings, integers, and floats. + If input_string is empty or has no placeholders, inputs can be empty. + + Returns: + The interpolated string with all template variables replaced with their values. + Empty string if input_string is None or empty. + + Raises: + ValueError: If a required template variable is missing from inputs. + KeyError: If a template variable is not found in the inputs dictionary. + """ + if input_string is None or not input_string: + return "" + if "{" not in input_string and "}" not in input_string: + return input_string + if not inputs: + raise ValueError("Inputs dictionary cannot be empty when interpolating variables") - return escaped_string.format(**inputs) + try: + # Validate input types + for key, value in inputs.items(): + if not isinstance(value, (str, int, float)): + raise ValueError(f"Value for key '{key}' must be a string, integer, or float, got {type(value).__name__}") + + escaped_string = input_string.replace("{", "{{").replace("}", "}}") + + for key in inputs.keys(): + escaped_string = escaped_string.replace(f"{{{{{key}}}}}", f"{{{key}}}") + + return escaped_string.format(**inputs) + except KeyError as e: + raise KeyError(f"Template variable '{e.args[0]}' not found in inputs dictionary") from e + except ValueError as e: + raise ValueError(f"Error during string interpolation: {str(e)}") from e def increment_tools_errors(self) -> None: """Increment the tools errors counter.""" diff --git a/tests/cassettes/test_crew_output_file_end_to_end.yaml b/tests/cassettes/test_crew_output_file_end_to_end.yaml new file mode 100644 index 000000000..2cbd6d9c3 --- /dev/null +++ b/tests/cassettes/test_crew_output_file_end_to_end.yaml @@ -0,0 +1,243 @@ +interactions: +- request: + body: !!binary | + CuIcCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkSuRwKEgoQY3Jld2FpLnRl + bGVtZXRyeRKjBwoQXK7w4+uvyEkrI9D5qyvcJxII5UmQ7hmczdIqDENyZXcgQ3JlYXRlZDABOfxQ + /hs4jBUYQUi3DBw4jBUYShoKDmNyZXdhaV92ZXJzaW9uEggKBjAuODYuMEoaCg5weXRob25fdmVy + c2lvbhIICgYzLjEyLjdKLgoIY3Jld19rZXkSIgogYzk3YjVmZWI1ZDFiNjZiYjU5MDA2YWFhMDFh + MjljZDZKMQoHY3Jld19pZBImCiRkZjY3NGMwYi1hOTc0LTQ3NTAtYjlkMS0yZWQxNjM3MzFiNTZK + HAoMY3Jld19wcm9jZXNzEgwKCnNlcXVlbnRpYWxKEQoLY3Jld19tZW1vcnkSAhAAShoKFGNyZXdf + bnVtYmVyX29mX3Rhc2tzEgIYAUobChVjcmV3X251bWJlcl9vZl9hZ2VudHMSAhgBStECCgtjcmV3 + X2FnZW50cxLBAgq+Alt7ImtleSI6ICIwN2Q5OWI2MzA0MTFkMzVmZDkwNDdhNTMyZDUzZGRhNyIs + ICJpZCI6ICI5MDYwYTQ2Zi02MDY3LTQ1N2MtOGU3ZC04NjAyN2YzY2U5ZDUiLCAicm9sZSI6ICJS + ZXNlYXJjaGVyIiwgInZlcmJvc2U/IjogZmFsc2UsICJtYXhfaXRlciI6IDIwLCAibWF4X3JwbSI6 + IG51bGwsICJmdW5jdGlvbl9jYWxsaW5nX2xsbSI6ICIiLCAibGxtIjogImdwdC00by1taW5pIiwg + ImRlbGVnYXRpb25fZW5hYmxlZD8iOiBmYWxzZSwgImFsbG93X2NvZGVfZXhlY3V0aW9uPyI6IGZh + bHNlLCAibWF4X3JldHJ5X2xpbWl0IjogMiwgInRvb2xzX25hbWVzIjogW119XUr/AQoKY3Jld190 + YXNrcxLwAQrtAVt7ImtleSI6ICI2Mzk5NjUxN2YzZjNmMWM5NGQ2YmI2MTdhYTBiMWM0ZiIsICJp + ZCI6ICJjYTA4ZjkyOS0yMmI0LTQyZmQtYjViMC05N2M3MjM0ZDk5OTEiLCAiYXN5bmNfZXhlY3V0 + aW9uPyI6IGZhbHNlLCAiaHVtYW5faW5wdXQ/IjogZmFsc2UsICJhZ2VudF9yb2xlIjogIlJlc2Vh + cmNoZXIiLCAiYWdlbnRfa2V5IjogIjA3ZDk5YjYzMDQxMWQzNWZkOTA0N2E1MzJkNTNkZGE3Iiwg + InRvb2xzX25hbWVzIjogW119XXoCGAGFAQABAAASjgIKEOTJZh9R45IwgGVg9cinZmISCJopKRMf + bpMJKgxUYXNrIENyZWF0ZWQwATlG+zQcOIwVGEHk0zUcOIwVGEouCghjcmV3X2tleRIiCiBjOTdi + NWZlYjVkMWI2NmJiNTkwMDZhYWEwMWEyOWNkNkoxCgdjcmV3X2lkEiYKJGRmNjc0YzBiLWE5NzQt + NDc1MC1iOWQxLTJlZDE2MzczMWI1NkouCgh0YXNrX2tleRIiCiA2Mzk5NjUxN2YzZjNmMWM5NGQ2 + YmI2MTdhYTBiMWM0ZkoxCgd0YXNrX2lkEiYKJGNhMDhmOTI5LTIyYjQtNDJmZC1iNWIwLTk3Yzcy + MzRkOTk5MXoCGAGFAQABAAASowcKEEvwrN8+tNMIBwtnA+ip7jASCI78Hrh2wlsBKgxDcmV3IENy + ZWF0ZWQwATkcRqYeOIwVGEE8erQeOIwVGEoaCg5jcmV3YWlfdmVyc2lvbhIICgYwLjg2LjBKGgoO + cHl0aG9uX3ZlcnNpb24SCAoGMy4xMi43Si4KCGNyZXdfa2V5EiIKIDhjMjc1MmY0OWU1YjlkMmI2 + OGNiMzVjYWM4ZmNjODZkSjEKB2NyZXdfaWQSJgokZmRkYzA4ZTMtNDUyNi00N2Q2LThlNWMtNjY0 + YzIyMjc4ZDgyShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEKC2NyZXdfbWVtb3J5EgIQ + AEoaChRjcmV3X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1iZXJfb2ZfYWdlbnRzEgIY + AUrRAgoLY3Jld19hZ2VudHMSwQIKvgJbeyJrZXkiOiAiOGJkMjEzOWI1OTc1MTgxNTA2ZTQxZmQ5 + YzQ1NjNkNzUiLCAiaWQiOiAiY2UxNjA2YjktMjdiOS00ZDc4LWEyODctNDZiMDNlZDg3ZTA1Iiwg + InJvbGUiOiAiUmVzZWFyY2hlciIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0ZXIiOiAyMCwg + Im1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxsbSI6ICJncHQt + NG8tbWluaSIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4ZWN1 + dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFtdfV1K + /wEKCmNyZXdfdGFza3MS8AEK7QFbeyJrZXkiOiAiMGQ2ODVhMjE5OTRkOTQ5MDk3YmM1YTU2ZDcz + N2U2ZDEiLCAiaWQiOiAiNDdkMzRjZjktMGYxZS00Y2JkLTgzMzItNzRjZjY0YWRlOThlIiwgImFz + eW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRfcm9s + ZSI6ICJSZXNlYXJjaGVyIiwgImFnZW50X2tleSI6ICI4YmQyMTM5YjU5NzUxODE1MDZlNDFmZDlj + NDU2M2Q3NSIsICJ0b29sc19uYW1lcyI6IFtdfV16AhgBhQEAAQAAEo4CChAf4TXS782b0PBJ4NSB + JXwsEgjXnd13GkMzlyoMVGFzayBDcmVhdGVkMAE5mb/cHjiMFRhBGRTiHjiMFRhKLgoIY3Jld19r + ZXkSIgogOGMyNzUyZjQ5ZTViOWQyYjY4Y2IzNWNhYzhmY2M4NmRKMQoHY3Jld19pZBImCiRmZGRj + MDhlMy00NTI2LTQ3ZDYtOGU1Yy02NjRjMjIyNzhkODJKLgoIdGFza19rZXkSIgogMGQ2ODVhMjE5 + OTRkOTQ5MDk3YmM1YTU2ZDczN2U2ZDFKMQoHdGFza19pZBImCiQ0N2QzNGNmOS0wZjFlLTRjYmQt + ODMzMi03NGNmNjRhZGU5OGV6AhgBhQEAAQAAEqMHChAyBGKhzDhROB5pmAoXrikyEgj6SCwzj1dU + LyoMQ3JldyBDcmVhdGVkMAE5vkjTHziMFRhBRDbhHziMFRhKGgoOY3Jld2FpX3ZlcnNpb24SCAoG + MC44Ni4wShoKDnB5dGhvbl92ZXJzaW9uEggKBjMuMTIuN0ouCghjcmV3X2tleRIiCiBiNjczNjg2 + ZmM4MjJjMjAzYzdlODc5YzY3NTQyNDY5OUoxCgdjcmV3X2lkEiYKJGYyYWVlYTYzLTU2OWUtNDUz + NS1iZTY0LTRiZjYzZmU5NjhjN0ocCgxjcmV3X3Byb2Nlc3MSDAoKc2VxdWVudGlhbEoRCgtjcmV3 + X21lbW9yeRICEABKGgoUY3Jld19udW1iZXJfb2ZfdGFza3MSAhgBShsKFWNyZXdfbnVtYmVyX29m + X2FnZW50cxICGAFK0QIKC2NyZXdfYWdlbnRzEsECCr4CW3sia2V5IjogImI1OWNmNzdiNmU3NjU4 + NDg3MGViMWMzODgyM2Q3ZTI4IiwgImlkIjogImJiZjNkM2E4LWEwMjUtNGI0ZC1hY2Q0LTFmNzcz + NTI3MWJmMCIsICJyb2xlIjogIlJlc2VhcmNoZXIiLCAidmVyYm9zZT8iOiBmYWxzZSwgIm1heF9p + dGVyIjogMjAsICJtYXhfcnBtIjogbnVsbCwgImZ1bmN0aW9uX2NhbGxpbmdfbGxtIjogIiIsICJs + bG0iOiAiZ3B0LTRvLW1pbmkiLCAiZGVsZWdhdGlvbl9lbmFibGVkPyI6IGZhbHNlLCAiYWxsb3df + Y29kZV9leGVjdXRpb24/IjogZmFsc2UsICJtYXhfcmV0cnlfbGltaXQiOiAyLCAidG9vbHNfbmFt + ZXMiOiBbXX1dSv8BCgpjcmV3X3Rhc2tzEvABCu0BW3sia2V5IjogImE1ZTVjNThjZWExYjlkMDAz + MzJlNjg0NDFkMzI3YmRmIiwgImlkIjogIjBiOTRiMTY0LTM5NTktNGFmYS05Njg4LWJjNmEwZWMy + MWYzOCIsICJhc3luY19leGVjdXRpb24/IjogZmFsc2UsICJodW1hbl9pbnB1dD8iOiBmYWxzZSwg + ImFnZW50X3JvbGUiOiAiUmVzZWFyY2hlciIsICJhZ2VudF9rZXkiOiAiYjU5Y2Y3N2I2ZTc2NTg0 + ODcwZWIxYzM4ODIzZDdlMjgiLCAidG9vbHNfbmFtZXMiOiBbXX1degIYAYUBAAEAABKOAgoQyYfi + Ftim717svttBZY3p5hIIUxR5bBHzWWkqDFRhc2sgQ3JlYXRlZDABOV4OBiA4jBUYQbLjBiA4jBUY + Si4KCGNyZXdfa2V5EiIKIGI2NzM2ODZmYzgyMmMyMDNjN2U4NzljNjc1NDI0Njk5SjEKB2NyZXdf + aWQSJgokZjJhZWVhNjMtNTY5ZS00NTM1LWJlNjQtNGJmNjNmZTk2OGM3Si4KCHRhc2tfa2V5EiIK + IGE1ZTVjNThjZWExYjlkMDAzMzJlNjg0NDFkMzI3YmRmSjEKB3Rhc2tfaWQSJgokMGI5NGIxNjQt + Mzk1OS00YWZhLTk2ODgtYmM2YTBlYzIxZjM4egIYAYUBAAEAAA== + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '3685' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.27.0 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Sun, 29 Dec 2024 04:43:27 GMT + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Researcher. You have + extensive AI research experience.\nYour personal goal is: Analyze AI topics\nTo + give my best complete final answer to the task use the exact following format:\n\nThought: + I now can give a great answer\nFinal Answer: Your final answer must be the great + and the most complete as possible, it must be outcome described.\n\nI MUST use + these formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent + Task: Explain the advantages of AI.\n\nThis is the expect criteria for your + final answer: A summary of the main advantages, bullet points recommended.\nyou + MUST return the actual complete content as the final answer, not a summary.\n\nBegin! + This is VERY important to you, use the tools available and give your best Final + Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4o-mini", "stop": + ["\nObservation:"], "stream": false}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + connection: + - keep-alive + content-length: + - '922' + content-type: + - application/json + cookie: + - _cfuvid=eff7OIkJ0zWRunpA6z67LHqscmSe6XjNxXiPw1R3xCc-1733770413538-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.52.1 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.52.1 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AjfR6FDuTw7NGzy8w7sxjvOkUQlru\",\n \"object\": + \"chat.completion\",\n \"created\": 1735447404,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"I now can give a great answer \\nFinal + Answer: \\n**Advantages of AI** \\n\\n1. **Increased Efficiency and Productivity** + \ \\n - AI systems can process large amounts of data quickly and accurately, + leading to faster decision-making and increased productivity in various sectors.\\n\\n2. + **Cost Savings** \\n - Automation of repetitive and time-consuming tasks + reduces labor costs and increases operational efficiency, allowing businesses + to allocate resources more effectively.\\n\\n3. **Enhanced Data Analysis** \\n + \ - AI excels at analyzing big data, identifying patterns, and providing insights + that support better strategic planning and business decision-making.\\n\\n4. + **24/7 Availability** \\n - AI solutions, such as chatbots and virtual assistants, + operate continuously without breaks, offering constant support and customer + service, enhancing user experience.\\n\\n5. **Personalization** \\n - AI + enables the customization of content, products, and services based on user preferences + and behaviors, leading to improved customer satisfaction and loyalty.\\n\\n6. + **Improved Accuracy** \\n - AI technologies, such as machine learning algorithms, + reduce the likelihood of human error in various processes, leading to greater + accuracy and reliability.\\n\\n7. **Enhanced Innovation** \\n - AI fosters + innovative solutions by providing new tools and approaches to problem-solving, + enabling companies to develop cutting-edge products and services.\\n\\n8. **Scalability** + \ \\n - AI can be scaled to handle varying amounts of workloads without significant + changes to infrastructure, making it easier for organizations to expand operations.\\n\\n9. + **Predictive Capabilities** \\n - Advanced analytics powered by AI can anticipate + trends and outcomes, allowing businesses to proactively adjust strategies and + improve forecasting.\\n\\n10. **Health Benefits** \\n - In healthcare, AI + assists in diagnostics, personalized treatment plans, and predictive analytics, + leading to better patient care and improved health outcomes.\\n\\n11. **Safety + and Risk Mitigation** \\n - AI can enhance safety in various industries + by taking over dangerous tasks, monitoring for hazards, and predicting maintenance + needs for critical machinery, thereby preventing accidents.\\n\\n12. **Reduced + Environmental Impact** \\n - AI can optimize resource usage in areas such + as energy consumption and supply chain logistics, contributing to sustainability + efforts and reducing overall environmental footprints.\",\n \"refusal\": + null\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 168,\n \"completion_tokens\": + 440,\n \"total_tokens\": 608,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"system_fingerprint\": + \"fp_0aa8d3e20b\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8f9721053d1eb9f1-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 29 Dec 2024 04:43:32 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=5enubNIoQSGMYEgy8Q2FpzzhphA0y.0lXukRZrWFvMk-1735447412-1.0.1.1-FIK1sMkUl3YnW1gTC6ftDtb2mKsbosb4mwabdFAlWCfJ6pXeavYq.bPsfKNvzAb5WYq60yVGH5lHsJT05bhSgw; + path=/; expires=Sun, 29-Dec-24 05:13:32 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=63wmKMTuFamkLN8FBI4fP8JZWbjWiRxWm7wb3kz.z_A-1735447412038-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '7577' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999793' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_55b8d714656e8f10f4e23cbe9034d66b + http_version: HTTP/1.1 + status_code: 200 +version: 1 diff --git a/tests/crew_test.py b/tests/crew_test.py index 2003ddada..74bcf08d3 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -1941,6 +1941,90 @@ def test_crew_log_file_output(tmp_path): assert test_file.exists() +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_crew_output_file_end_to_end(tmp_path): + """Test output file functionality in a full crew context.""" + # Create an agent + agent = Agent( + role="Researcher", + goal="Analyze AI topics", + backstory="You have extensive AI research experience.", + allow_delegation=False, + ) + + # Create a task with dynamic output file path + dynamic_path = tmp_path / "output_{topic}.txt" + task = Task( + description="Explain the advantages of {topic}.", + expected_output="A summary of the main advantages, bullet points recommended.", + agent=agent, + output_file=str(dynamic_path), + ) + + # Create and run the crew + crew = Crew( + agents=[agent], + tasks=[task], + process=Process.sequential, + ) + crew.kickoff(inputs={"topic": "AI"}) + + # Verify file creation and cleanup + expected_file = tmp_path / "output_AI.txt" + assert expected_file.exists(), f"Output file {expected_file} was not created" + + +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_crew_output_file_validation_failures(): + """Test output file validation failures in a crew context.""" + agent = Agent( + role="Researcher", + goal="Analyze data", + backstory="You analyze data files.", + allow_delegation=False, + ) + + # Test path traversal + with pytest.raises(ValueError, match="Path traversal"): + task = Task( + description="Analyze data", + expected_output="Analysis results", + agent=agent, + output_file="../output.txt" + ) + Crew(agents=[agent], tasks=[task]).kickoff() + + # Test shell special characters + with pytest.raises(ValueError, match="Shell special characters"): + task = Task( + description="Analyze data", + expected_output="Analysis results", + agent=agent, + output_file="output.txt | rm -rf /" + ) + Crew(agents=[agent], tasks=[task]).kickoff() + + # Test shell expansion + with pytest.raises(ValueError, match="Shell expansion"): + task = Task( + description="Analyze data", + expected_output="Analysis results", + agent=agent, + output_file="~/output.txt" + ) + Crew(agents=[agent], tasks=[task]).kickoff() + + # Test invalid template variable + with pytest.raises(ValueError, match="Invalid template variable"): + task = Task( + description="Analyze data", + expected_output="Analysis results", + agent=agent, + output_file="{invalid-name}/output.txt" + ) + Crew(agents=[agent], tasks=[task]).kickoff() + + @pytest.mark.vcr(filter_headers=["authorization"]) def test_manager_agent(): from unittest.mock import patch @@ -3125,4 +3209,4 @@ def test_multimodal_agent_live_image_analysis(): # Verify we got a meaningful response assert isinstance(result.raw, str) assert len(result.raw) > 100 # Expecting a detailed analysis - assert "error" not in result.raw.lower() # No error messages in response \ No newline at end of file + assert "error" not in result.raw.lower() # No error messages in response diff --git a/tests/task_test.py b/tests/task_test.py index 40eb98e54..dc15c251f 100644 --- a/tests/task_test.py +++ b/tests/task_test.py @@ -719,21 +719,24 @@ def test_interpolate_inputs(): task = Task( description="Give me a list of 5 interesting ideas about {topic} to explore for an article, what makes them unique and interesting.", expected_output="Bullet point list of 5 interesting ideas about {topic}.", + output_file="/tmp/{topic}/output_{date}.txt" ) - task.interpolate_inputs(inputs={"topic": "AI"}) + task.interpolate_inputs(inputs={"topic": "AI", "date": "2024"}) assert ( task.description == "Give me a list of 5 interesting ideas about AI to explore for an article, what makes them unique and interesting." ) assert task.expected_output == "Bullet point list of 5 interesting ideas about AI." + assert task.output_file == "/tmp/AI/output_2024.txt" - task.interpolate_inputs(inputs={"topic": "ML"}) + task.interpolate_inputs(inputs={"topic": "ML", "date": "2025"}) assert ( task.description == "Give me a list of 5 interesting ideas about ML to explore for an article, what makes them unique and interesting." ) assert task.expected_output == "Bullet point list of 5 interesting ideas about ML." + assert task.output_file == "/tmp/ML/output_2025.txt" def test_interpolate_only(): @@ -872,3 +875,61 @@ def test_key(): assert ( task.key == hash ), "The key should be the hash of the non-interpolated description." + + +def test_output_file_validation(): + """Test output file path validation.""" + # Valid paths + assert Task( + description="Test task", + expected_output="Test output", + output_file="output.txt" + ).output_file == "output.txt" + assert Task( + description="Test task", + expected_output="Test output", + output_file="/tmp/output.txt" + ).output_file == "tmp/output.txt" + assert Task( + description="Test task", + expected_output="Test output", + output_file="{dir}/output_{date}.txt" + ).output_file == "{dir}/output_{date}.txt" + + # Invalid paths + with pytest.raises(ValueError, match="Path traversal"): + Task( + description="Test task", + expected_output="Test output", + output_file="../output.txt" + ) + with pytest.raises(ValueError, match="Path traversal"): + Task( + description="Test task", + expected_output="Test output", + output_file="folder/../output.txt" + ) + with pytest.raises(ValueError, match="Shell special characters"): + Task( + description="Test task", + expected_output="Test output", + output_file="output.txt | rm -rf /" + ) + with pytest.raises(ValueError, match="Shell expansion"): + Task( + description="Test task", + expected_output="Test output", + output_file="~/output.txt" + ) + with pytest.raises(ValueError, match="Shell expansion"): + Task( + description="Test task", + expected_output="Test output", + output_file="$HOME/output.txt" + ) + with pytest.raises(ValueError, match="Invalid template variable"): + Task( + description="Test task", + expected_output="Test output", + output_file="{invalid-name}/output.txt" + ) From d85898cf29ca6f67fbf3d14e06bbd075309b308a Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 16:58:18 -0300 Subject: [PATCH 08/11] fix(manager_llm): handle coworker role name case/whitespace properly (#1820) * fix(manager_llm): handle coworker role name case/whitespace properly - Add .strip() to agent name and role comparisons in base_agent_tools.py - Add test case for varied role name cases and whitespace - Fix issue #1503 with manager LLM delegation Co-Authored-By: Joe Moura * fix(manager_llm): improve error handling and add debug logging - Add debug logging for better observability - Add sanitize_agent_name helper method - Enhance error messages with more context - Add parameterized tests for edge cases: - Embedded quotes - Trailing newlines - Multiple whitespace - Case variations - None values - Improve error handling with specific exceptions Co-Authored-By: Joe Moura * style: fix import sorting in base_agent_tools and test_manager_llm_delegation Co-Authored-By: Joe Moura * fix(manager_llm): improve whitespace normalization in role name matching Co-Authored-By: Joe Moura * style: fix import sorting in base_agent_tools and test_manager_llm_delegation Co-Authored-By: Joe Moura * fix(manager_llm): add error message template for agent tool execution errors Co-Authored-By: Joe Moura * style: fix import sorting in test_manager_llm_delegation.py Co-Authored-By: Joe Moura --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura --- .../tools/agent_tools/base_agent_tools.py | 86 +++++++++++++++---- src/crewai/translations/en.json | 3 +- tests/crew_test.py | 65 ++++++++++++++ tests/test_manager_llm_delegation.py | 55 ++++++++++++ 4 files changed, 193 insertions(+), 16 deletions(-) create mode 100644 tests/test_manager_llm_delegation.py diff --git a/src/crewai/tools/agent_tools/base_agent_tools.py b/src/crewai/tools/agent_tools/base_agent_tools.py index ea63dd51e..e67dce72a 100644 --- a/src/crewai/tools/agent_tools/base_agent_tools.py +++ b/src/crewai/tools/agent_tools/base_agent_tools.py @@ -1,3 +1,4 @@ +import logging from typing import Optional, Union from pydantic import Field @@ -7,6 +8,8 @@ from crewai.task import Task from crewai.tools.base_tool import BaseTool from crewai.utilities import I18N +logger = logging.getLogger(__name__) + class BaseAgentTool(BaseTool): """Base class for agent-related tools""" @@ -16,6 +19,25 @@ class BaseAgentTool(BaseTool): default_factory=I18N, description="Internationalization settings" ) + def sanitize_agent_name(self, name: str) -> str: + """ + Sanitize agent role name by normalizing whitespace and setting to lowercase. + Converts all whitespace (including newlines) to single spaces and removes quotes. + + Args: + name (str): The agent role name to sanitize + + Returns: + str: The sanitized agent role name, with whitespace normalized, + converted to lowercase, and quotes removed + """ + if not name: + return "" + # Normalize all whitespace (including newlines) to single spaces + normalized = " ".join(name.split()) + # Remove quotes and convert to lowercase + return normalized.replace('"', "").casefold() + def _get_coworker(self, coworker: Optional[str], **kwargs) -> Optional[str]: coworker = coworker or kwargs.get("co_worker") or kwargs.get("coworker") if coworker: @@ -25,11 +47,27 @@ class BaseAgentTool(BaseTool): return coworker def _execute( - self, agent_name: Union[str, None], task: str, context: Union[str, None] + self, + agent_name: Optional[str], + task: str, + context: Optional[str] = None ) -> str: + """ + Execute delegation to an agent with case-insensitive and whitespace-tolerant matching. + + Args: + agent_name: Name/role of the agent to delegate to (case-insensitive) + task: The specific question or task to delegate + context: Optional additional context for the task execution + + Returns: + str: The execution result from the delegated agent or an error message + if the agent cannot be found + """ try: if agent_name is None: agent_name = "" + logger.debug("No agent name provided, using empty string") # It is important to remove the quotes from the agent name. # The reason we have to do this is because less-powerful LLM's @@ -38,31 +76,49 @@ class BaseAgentTool(BaseTool): # {"task": "....", "coworker": ".... # when it should look like this: # {"task": "....", "coworker": "...."} - agent_name = agent_name.casefold().replace('"', "").replace("\n", "") + sanitized_name = self.sanitize_agent_name(agent_name) + logger.debug(f"Sanitized agent name from '{agent_name}' to '{sanitized_name}'") + + available_agents = [agent.role for agent in self.agents] + logger.debug(f"Available agents: {available_agents}") + agent = [ # type: ignore # Incompatible types in assignment (expression has type "list[BaseAgent]", variable has type "str | None") available_agent for available_agent in self.agents - if available_agent.role.casefold().replace("\n", "") == agent_name + if self.sanitize_agent_name(available_agent.role) == sanitized_name ] - except Exception as _: + logger.debug(f"Found {len(agent)} matching agents for role '{sanitized_name}'") + except (AttributeError, ValueError) as e: + # Handle specific exceptions that might occur during role name processing return self.i18n.errors("agent_tool_unexisting_coworker").format( coworkers="\n".join( - [f"- {agent.role.casefold()}" for agent in self.agents] - ) + [f"- {self.sanitize_agent_name(agent.role)}" for agent in self.agents] + ), + error=str(e) ) if not agent: + # No matching agent found after sanitization return self.i18n.errors("agent_tool_unexisting_coworker").format( coworkers="\n".join( - [f"- {agent.role.casefold()}" for agent in self.agents] - ) + [f"- {self.sanitize_agent_name(agent.role)}" for agent in self.agents] + ), + error=f"No agent found with role '{sanitized_name}'" ) agent = agent[0] - task_with_assigned_agent = Task( # type: ignore # Incompatible types in assignment (expression has type "Task", variable has type "str") - description=task, - agent=agent, - expected_output=agent.i18n.slice("manager_request"), - i18n=agent.i18n, - ) - return agent.execute_task(task_with_assigned_agent, context) + try: + task_with_assigned_agent = Task( + description=task, + agent=agent, + expected_output=agent.i18n.slice("manager_request"), + i18n=agent.i18n, + ) + logger.debug(f"Created task for agent '{self.sanitize_agent_name(agent.role)}': {task}") + return agent.execute_task(task_with_assigned_agent, context) + except Exception as e: + # Handle task creation or execution errors + return self.i18n.errors("agent_tool_execution_error").format( + agent_role=self.sanitize_agent_name(agent.role), + error=str(e) + ) diff --git a/src/crewai/translations/en.json b/src/crewai/translations/en.json index 12850c9e2..a1ea30436 100644 --- a/src/crewai/translations/en.json +++ b/src/crewai/translations/en.json @@ -33,7 +33,8 @@ "tool_usage_error": "I encountered an error: {error}", "tool_arguments_error": "Error: the Action Input is not a valid key, value dictionary.", "wrong_tool_name": "You tried to use the tool {tool}, but it doesn't exist. You must use one of the following tools, use one at time: {tools}.", - "tool_usage_exception": "I encountered an error while trying to use the tool. This was the error: {error}.\n Tool {tool} accepts these inputs: {tool_inputs}" + "tool_usage_exception": "I encountered an error while trying to use the tool. This was the error: {error}.\n Tool {tool} accepts these inputs: {tool_inputs}", + "agent_tool_execution_error": "Error executing task with agent '{agent_role}'. Error: {error}" }, "tools": { "delegate_work": "Delegate a specific task to one of the following coworkers: {coworkers}\nThe input to this tool should be the coworker, the task you want them to do, and ALL necessary context to execute the task, they know nothing about the task, so share absolute everything you know, don't reference things but instead explain them.", diff --git a/tests/crew_test.py b/tests/crew_test.py index 74bcf08d3..0cb8f469c 100644 --- a/tests/crew_test.py +++ b/tests/crew_test.py @@ -391,6 +391,71 @@ def test_manager_agent_delegating_to_all_agents(): ) +@pytest.mark.vcr(filter_headers=["authorization"]) +def test_manager_agent_delegates_with_varied_role_cases(): + """ + Test that the manager agent can delegate to agents regardless of case or whitespace variations in role names. + This test verifies the fix for issue #1503 where role matching was too strict. + """ + # Create agents with varied case and whitespace in roles + researcher_spaced = Agent( + role=" Researcher ", # Extra spaces + goal="Research with spaces in role", + backstory="A researcher with spaces in role name", + allow_delegation=False, + ) + + writer_caps = Agent( + role="SENIOR WRITER", # All caps + goal="Write with caps in role", + backstory="A writer with caps in role name", + allow_delegation=False, + ) + + task = Task( + description="Research and write about AI. The researcher should do the research, and the writer should write it up.", + expected_output="A well-researched article about AI.", + agent=researcher_spaced, # Assign to researcher with spaces + ) + + crew = Crew( + agents=[researcher_spaced, writer_caps], + process=Process.hierarchical, + manager_llm="gpt-4o", + tasks=[task], + ) + + mock_task_output = TaskOutput( + description="Mock description", + raw="mocked output", + agent="mocked agent" + ) + task.output = mock_task_output + + with patch.object(Task, 'execute_sync', return_value=mock_task_output) as mock_execute_sync: + crew.kickoff() + + # Verify execute_sync was called once + mock_execute_sync.assert_called_once() + + # Get the tools argument from the call + _, kwargs = mock_execute_sync.call_args + tools = kwargs['tools'] + + # Verify the delegation tools were passed correctly and can handle case/whitespace variations + assert len(tools) == 2 + + # Check delegation tool descriptions (should work despite case/whitespace differences) + delegation_tool = tools[0] + question_tool = tools[1] + + assert "Delegate a specific task to one of the following coworkers:" in delegation_tool.description + assert " Researcher " in delegation_tool.description or "SENIOR WRITER" in delegation_tool.description + + assert "Ask a specific question to one of the following coworkers:" in question_tool.description + assert " Researcher " in question_tool.description or "SENIOR WRITER" in question_tool.description + + @pytest.mark.vcr(filter_headers=["authorization"]) def test_crew_with_delegating_agents(): tasks = [ diff --git a/tests/test_manager_llm_delegation.py b/tests/test_manager_llm_delegation.py new file mode 100644 index 000000000..d1f2068e4 --- /dev/null +++ b/tests/test_manager_llm_delegation.py @@ -0,0 +1,55 @@ +from unittest.mock import MagicMock + +import pytest + +from crewai import Agent, Task +from crewai.tools.agent_tools.base_agent_tools import BaseAgentTool + + +class TestAgentTool(BaseAgentTool): + """Concrete implementation of BaseAgentTool for testing.""" + def _run(self, *args, **kwargs): + """Implement required _run method.""" + return "Test response" + +@pytest.mark.parametrize("role_name,should_match", [ + ('Futel Official Infopoint', True), # exact match + (' "Futel Official Infopoint" ', True), # extra quotes and spaces + ('Futel Official Infopoint\n', True), # trailing newline + ('"Futel Official Infopoint"', True), # embedded quotes + (' FUTEL\nOFFICIAL INFOPOINT ', True), # multiple whitespace and newline + ('futel official infopoint', True), # lowercase + ('FUTEL OFFICIAL INFOPOINT', True), # uppercase + ('Non Existent Agent', False), # non-existent agent + (None, False), # None agent name +]) +def test_agent_tool_role_matching(role_name, should_match): + """Test that agent tools can match roles regardless of case, whitespace, and special characters.""" + # Create test agent + test_agent = Agent( + role='Futel Official Infopoint', + goal='Answer questions about Futel', + backstory='Futel Football Club info', + allow_delegation=False + ) + + # Create test agent tool + agent_tool = TestAgentTool( + name="test_tool", + description="Test tool", + agents=[test_agent] + ) + + # Test role matching + result = agent_tool._execute( + agent_name=role_name, + task='Test task', + context=None + ) + + if should_match: + assert "coworker mentioned not found" not in result.lower(), \ + f"Should find agent with role name: {role_name}" + else: + assert "coworker mentioned not found" in result.lower(), \ + f"Should not find agent with role name: {role_name}" From ba0965ef874eee958b8cb4cc091f23416ba3c9fb Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 30 Dec 2024 17:10:56 -0300 Subject: [PATCH 09/11] fix: add tiktoken as explicit dependency and document Rust requirement (#1826) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * feat: add tiktoken as explicit dependency and document Rust requirement - Add tiktoken>=0.8.0 as explicit dependency to ensure pre-built wheels are used - Document Rust compiler requirement as fallback in README.md - Addresses issue #1824 tiktoken build failure Co-Authored-By: Joe Moura * fix: adjust tiktoken version to ~=0.7.0 for dependency compatibility - Update tiktoken dependency to ~=0.7.0 to resolve conflict with embedchain - Maintain compatibility with crewai-tools dependency chain - Addresses CI build failures Co-Authored-By: Joe Moura * docs: add troubleshooting section and make tiktoken optional Co-Authored-By: Joe Moura * Update README.md --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura Co-authored-by: João Moura --- README.md | 17 ++++++++++++++++- pyproject.toml | 32 +++++++++++++++++++++++--------- 2 files changed, 39 insertions(+), 10 deletions(-) diff --git a/README.md b/README.md index bf1287d4d..edcbb6f51 100644 --- a/README.md +++ b/README.md @@ -85,7 +85,6 @@ First, install CrewAI: ```shell pip install crewai ``` - If you want to install the 'crewai' package along with its optional features that include additional tools for agents, you can do so by using the following command: ```shell @@ -93,6 +92,22 @@ pip install 'crewai[tools]' ``` The command above installs the basic package and also adds extra components which require more dependencies to function. +### Troubleshooting Dependencies + +If you encounter issues during installation or usage, here are some common solutions: + +#### Common Issues + +1. **ModuleNotFoundError: No module named 'tiktoken'** + - Install tiktoken explicitly: `pip install 'crewai[embeddings]'` + - If using embedchain or other tools: `pip install 'crewai[tools]'` + +2. **Failed building wheel for tiktoken** + - Ensure Rust compiler is installed (see installation steps above) + - For Windows: Verify Visual C++ Build Tools are installed + - Try upgrading pip: `pip install --upgrade pip` + - If issues persist, use a pre-built wheel: `pip install tiktoken --prefer-binary` + ### 2. Setting Up Your Crew with the YAML Configuration To create a new CrewAI project, run the following CLI (Command Line Interface) command: diff --git a/pyproject.toml b/pyproject.toml index 3f10c1a87..bcc00a0d9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,27 +8,38 @@ authors = [ { name = "Joao Moura", email = "joao@crewai.com" } ] dependencies = [ + # Core Dependencies "pydantic>=2.4.2", "openai>=1.13.3", + "litellm>=1.44.22", + "instructor>=1.3.3", + + # Text Processing + "pdfplumber>=0.11.4", + "regex>=2024.9.11", + + # Telemetry and Monitoring "opentelemetry-api>=1.22.0", "opentelemetry-sdk>=1.22.0", "opentelemetry-exporter-otlp-proto-http>=1.22.0", - "instructor>=1.3.3", - "regex>=2024.9.11", - "click>=8.1.7", + + # Data Handling + "chromadb>=0.5.23", + "openpyxl>=3.1.5", + "pyvis>=0.3.2", + + # Authentication and Security + "auth0-python>=4.7.1", "python-dotenv>=1.0.0", + + # Configuration and Utils + "click>=8.1.7", "appdirs>=1.4.4", "jsonref>=1.1.0", "json-repair>=0.25.2", - "auth0-python>=4.7.1", - "litellm>=1.44.22", - "pyvis>=0.3.2", "uv>=0.4.25", "tomli-w>=1.1.0", "tomli>=2.0.2", - "chromadb>=0.5.23", - "pdfplumber>=0.11.4", - "openpyxl>=3.1.5", "blinker>=1.9.0", ] @@ -39,6 +50,9 @@ Repository = "https://github.com/crewAIInc/crewAI" [project.optional-dependencies] tools = ["crewai-tools>=0.17.0"] +embeddings = [ + "tiktoken~=0.7.0" +] agentops = ["agentops>=0.3.0"] fastembed = ["fastembed>=0.4.1"] pdfplumber = [ From 45b802a6252334402947451d26f4fdc6b5c72629 Mon Sep 17 00:00:00 2001 From: "devin-ai-integration[bot]" <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 31 Dec 2024 01:39:19 -0300 Subject: [PATCH 10/11] Docstring, Error Handling, and Type Hints Improvements (#1828) * docs: add comprehensive docstrings to Flow class and methods - Added NumPy-style docstrings to all decorator functions - Added detailed documentation to Flow class methods - Included parameter types, return types, and examples - Enhanced documentation clarity and completeness Co-Authored-By: Joe Moura * feat: add secure path handling utilities - Add path_utils.py with safe path handling functions - Implement path validation and security checks - Integrate secure path handling in flow_visualizer.py - Add path validation in html_template_handler.py - Add comprehensive error handling for path operations Co-Authored-By: Joe Moura * docs: add comprehensive docstrings and type hints to flow utils (#1819) Co-Authored-By: Joe Moura * fix: add type annotations and fix import sorting Co-Authored-By: Joe Moura * fix: add type annotations to flow utils and visualization utils Co-Authored-By: Joe Moura * fix: resolve import sorting and type annotation issues Co-Authored-By: Joe Moura * fix: properly initialize and update edge_smooth variable Co-Authored-By: Joe Moura --------- Co-authored-by: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Co-authored-by: Joe Moura --- src/crewai/flow/flow.py | 282 ++++++++++++++++++++++- src/crewai/flow/flow_visualizer.py | 232 ++++++++++++++----- src/crewai/flow/html_template_handler.py | 25 +- src/crewai/flow/path_utils.py | 135 +++++++++++ src/crewai/flow/utils.py | 172 ++++++++++++-- src/crewai/flow/visualization_utils.py | 130 ++++++++++- 6 files changed, 889 insertions(+), 87 deletions(-) create mode 100644 src/crewai/flow/path_utils.py diff --git a/src/crewai/flow/flow.py b/src/crewai/flow/flow.py index 4a6361cce..806d9ec84 100644 --- a/src/crewai/flow/flow.py +++ b/src/crewai/flow/flow.py @@ -30,7 +30,47 @@ from crewai.telemetry import Telemetry T = TypeVar("T", bound=Union[BaseModel, Dict[str, Any]]) -def start(condition=None): +def start(condition: Optional[Union[str, dict, Callable]] = None) -> Callable: + """ + Marks a method as a flow's starting point. + + This decorator designates a method as an entry point for the flow execution. + It can optionally specify conditions that trigger the start based on other + method executions. + + Parameters + ---------- + condition : Optional[Union[str, dict, Callable]], optional + Defines when the start method should execute. Can be: + - str: Name of a method that triggers this start + - dict: Contains "type" ("AND"/"OR") and "methods" (list of triggers) + - Callable: A method reference that triggers this start + Default is None, meaning unconditional start. + + Returns + ------- + Callable + A decorator function that marks the method as a flow start point. + + Raises + ------ + ValueError + If the condition format is invalid. + + Examples + -------- + >>> @start() # Unconditional start + >>> def begin_flow(self): + ... pass + + >>> @start("method_name") # Start after specific method + >>> def conditional_start(self): + ... pass + + >>> @start(and_("method1", "method2")) # Start after multiple methods + >>> def complex_start(self): + ... pass + """ def decorator(func): func.__is_start_method__ = True if condition is not None: @@ -56,7 +96,42 @@ def start(condition=None): return decorator -def listen(condition): +def listen(condition: Union[str, dict, Callable]) -> Callable: + """ + Creates a listener that executes when specified conditions are met. + + This decorator sets up a method to execute in response to other method + executions in the flow. It supports both simple and complex triggering + conditions. + + Parameters + ---------- + condition : Union[str, dict, Callable] + Specifies when the listener should execute. Can be: + - str: Name of a method that triggers this listener + - dict: Contains "type" ("AND"/"OR") and "methods" (list of triggers) + - Callable: A method reference that triggers this listener + + Returns + ------- + Callable + A decorator function that sets up the method as a listener. + + Raises + ------ + ValueError + If the condition format is invalid. + + Examples + -------- + >>> @listen("process_data") # Listen to single method + >>> def handle_processed_data(self): + ... pass + + >>> @listen(or_("success", "failure")) # Listen to multiple methods + >>> def handle_completion(self): + ... pass + """ def decorator(func): if isinstance(condition, str): func.__trigger_methods__ = [condition] @@ -80,7 +155,47 @@ def listen(condition): return decorator -def router(condition): +def router(condition: Union[str, dict, Callable]) -> Callable: + """ + Creates a routing method that directs flow execution based on conditions. + + This decorator marks a method as a router, which can dynamically determine + the next steps in the flow based on its return value. Routers are triggered + by specified conditions and can return constants that determine which path + the flow should take. + + Parameters + ---------- + condition : Union[str, dict, Callable] + Specifies when the router should execute. Can be: + - str: Name of a method that triggers this router + - dict: Contains "type" ("AND"/"OR") and "methods" (list of triggers) + - Callable: A method reference that triggers this router + + Returns + ------- + Callable + A decorator function that sets up the method as a router. + + Raises + ------ + ValueError + If the condition format is invalid. + + Examples + -------- + >>> @router("check_status") + >>> def route_based_on_status(self): + ... if self.state.status == "success": + ... return SUCCESS + ... return FAILURE + + >>> @router(and_("validate", "process")) + >>> def complex_routing(self): + ... if all([self.state.valid, self.state.processed]): + ... return CONTINUE + ... return STOP + """ def decorator(func): func.__is_router__ = True # Handle conditions like listen/start @@ -106,7 +221,39 @@ def router(condition): return decorator -def or_(*conditions): +def or_(*conditions: Union[str, dict, Callable]) -> dict: + """ + Combines multiple conditions with OR logic for flow control. + + Creates a condition that is satisfied when any of the specified conditions + are met. This is used with @start, @listen, or @router decorators to create + complex triggering conditions. + + Parameters + ---------- + *conditions : Union[str, dict, Callable] + Variable number of conditions that can be: + - str: Method names + - dict: Existing condition dictionaries + - Callable: Method references + + Returns + ------- + dict + A condition dictionary with format: + {"type": "OR", "methods": list_of_method_names} + + Raises + ------ + ValueError + If any condition is invalid. + + Examples + -------- + >>> @listen(or_("success", "timeout")) + >>> def handle_completion(self): + ... pass + """ methods = [] for condition in conditions: if isinstance(condition, dict) and "methods" in condition: @@ -120,7 +267,39 @@ def or_(*conditions): return {"type": "OR", "methods": methods} -def and_(*conditions): +def and_(*conditions: Union[str, dict, Callable]) -> dict: + """ + Combines multiple conditions with AND logic for flow control. + + Creates a condition that is satisfied only when all specified conditions + are met. This is used with @start, @listen, or @router decorators to create + complex triggering conditions. + + Parameters + ---------- + *conditions : Union[str, dict, Callable] + Variable number of conditions that can be: + - str: Method names + - dict: Existing condition dictionaries + - Callable: Method references + + Returns + ------- + dict + A condition dictionary with format: + {"type": "AND", "methods": list_of_method_names} + + Raises + ------ + ValueError + If any condition is invalid. + + Examples + -------- + >>> @listen(and_("validated", "processed")) + >>> def handle_complete_data(self): + ... pass + """ methods = [] for condition in conditions: if isinstance(condition, dict) and "methods" in condition: @@ -286,6 +465,23 @@ class Flow(Generic[T], metaclass=FlowMeta): return final_output async def _execute_start_method(self, start_method_name: str) -> None: + """ + Executes a flow's start method and its triggered listeners. + + This internal method handles the execution of methods marked with @start + decorator and manages the subsequent chain of listener executions. + + Parameters + ---------- + start_method_name : str + The name of the start method to execute. + + Notes + ----- + - Executes the start method and captures its result + - Triggers execution of any listeners waiting on this start method + - Part of the flow's initialization sequence + """ result = await self._execute_method( start_method_name, self._methods[start_method_name] ) @@ -306,6 +502,28 @@ class Flow(Generic[T], metaclass=FlowMeta): return result async def _execute_listeners(self, trigger_method: str, result: Any) -> None: + """ + Executes all listeners and routers triggered by a method completion. + + This internal method manages the execution flow by: + 1. First executing all triggered routers sequentially + 2. Then executing all triggered listeners in parallel + + Parameters + ---------- + trigger_method : str + The name of the method that triggered these listeners. + result : Any + The result from the triggering method, passed to listeners + that accept parameters. + + Notes + ----- + - Routers are executed sequentially to maintain flow control + - Each router's result becomes the new trigger_method + - Normal listeners are executed in parallel for efficiency + - Listeners can receive the trigger method's result as a parameter + """ # First, handle routers repeatedly until no router triggers anymore while True: routers_triggered = self._find_triggered_methods( @@ -335,6 +553,33 @@ class Flow(Generic[T], metaclass=FlowMeta): def _find_triggered_methods( self, trigger_method: str, router_only: bool ) -> List[str]: + """ + Finds all methods that should be triggered based on conditions. + + This internal method evaluates both OR and AND conditions to determine + which methods should be executed next in the flow. + + Parameters + ---------- + trigger_method : str + The name of the method that just completed execution. + router_only : bool + If True, only consider router methods. + If False, only consider non-router methods. + + Returns + ------- + List[str] + Names of methods that should be triggered. + + Notes + ----- + - Handles both OR and AND conditions: + * OR: Triggers if any condition is met + * AND: Triggers only when all conditions are met + - Maintains state for AND conditions using _pending_and_listeners + - Separates router and normal listener evaluation + """ triggered = [] for listener_name, (condition_type, methods) in self._listeners.items(): is_router = listener_name in self._routers @@ -363,6 +608,33 @@ class Flow(Generic[T], metaclass=FlowMeta): return triggered async def _execute_single_listener(self, listener_name: str, result: Any) -> None: + """ + Executes a single listener method with proper event handling. + + This internal method manages the execution of an individual listener, + including parameter inspection, event emission, and error handling. + + Parameters + ---------- + listener_name : str + The name of the listener method to execute. + result : Any + The result from the triggering method, which may be passed + to the listener if it accepts parameters. + + Notes + ----- + - Inspects method signature to determine if it accepts the trigger result + - Emits events for method execution start and finish + - Handles errors gracefully with detailed logging + - Recursively triggers listeners of this listener + - Supports both parameterized and parameter-less listeners + + Error Handling + ------------- + Catches and logs any exceptions during execution, preventing + individual listener failures from breaking the entire flow. + """ try: method = self._methods[listener_name] diff --git a/src/crewai/flow/flow_visualizer.py b/src/crewai/flow/flow_visualizer.py index 988f27919..ceacee91f 100644 --- a/src/crewai/flow/flow_visualizer.py +++ b/src/crewai/flow/flow_visualizer.py @@ -1,12 +1,14 @@ # flow_visualizer.py import os +from pathlib import Path from pyvis.network import Network from crewai.flow.config import COLORS, NODE_STYLES from crewai.flow.html_template_handler import HTMLTemplateHandler from crewai.flow.legend_generator import generate_legend_items_html, get_legend_items +from crewai.flow.path_utils import safe_path_join, validate_path_exists from crewai.flow.utils import calculate_node_levels from crewai.flow.visualization_utils import ( add_edges, @@ -17,88 +19,206 @@ from crewai.flow.visualization_utils import ( class FlowPlot: def __init__(self, flow): + """ + Initialize FlowPlot with a flow object. + + Parameters + ---------- + flow : Flow + A Flow instance to visualize. + + Raises + ------ + ValueError + If flow object is invalid or missing required attributes. + """ + if not hasattr(flow, '_methods'): + raise ValueError("Invalid flow object: missing '_methods' attribute") + if not hasattr(flow, '_listeners'): + raise ValueError("Invalid flow object: missing '_listeners' attribute") + if not hasattr(flow, '_start_methods'): + raise ValueError("Invalid flow object: missing '_start_methods' attribute") + self.flow = flow self.colors = COLORS self.node_styles = NODE_STYLES def plot(self, filename): - net = Network( - directed=True, - height="750px", - width="100%", - bgcolor=self.colors["bg"], - layout=None, - ) - - # Set options to disable physics - net.set_options( - """ - var options = { - "nodes": { - "font": { - "multi": "html" - } - }, - "physics": { - "enabled": false - } - } """ - ) + Generate and save an HTML visualization of the flow. - # Calculate levels for nodes - node_levels = calculate_node_levels(self.flow) + Parameters + ---------- + filename : str + Name of the output file (without extension). - # Compute positions - node_positions = compute_positions(self.flow, node_levels) + Raises + ------ + ValueError + If filename is invalid or network generation fails. + IOError + If file operations fail or visualization cannot be generated. + RuntimeError + If network visualization generation fails. + """ + if not filename or not isinstance(filename, str): + raise ValueError("Filename must be a non-empty string") + + try: + # Initialize network + net = Network( + directed=True, + height="750px", + width="100%", + bgcolor=self.colors["bg"], + layout=None, + ) - # Add nodes to the network - add_nodes_to_network(net, self.flow, node_positions, self.node_styles) + # Set options to disable physics + net.set_options( + """ + var options = { + "nodes": { + "font": { + "multi": "html" + } + }, + "physics": { + "enabled": false + } + } + """ + ) - # Add edges to the network - add_edges(net, self.flow, node_positions, self.colors) + # Calculate levels for nodes + try: + node_levels = calculate_node_levels(self.flow) + except Exception as e: + raise ValueError(f"Failed to calculate node levels: {str(e)}") - network_html = net.generate_html() - final_html_content = self._generate_final_html(network_html) + # Compute positions + try: + node_positions = compute_positions(self.flow, node_levels) + except Exception as e: + raise ValueError(f"Failed to compute node positions: {str(e)}") - # Save the final HTML content to the file - with open(f"{filename}.html", "w", encoding="utf-8") as f: - f.write(final_html_content) - print(f"Plot saved as {filename}.html") + # Add nodes to the network + try: + add_nodes_to_network(net, self.flow, node_positions, self.node_styles) + except Exception as e: + raise RuntimeError(f"Failed to add nodes to network: {str(e)}") - self._cleanup_pyvis_lib() + # Add edges to the network + try: + add_edges(net, self.flow, node_positions, self.colors) + except Exception as e: + raise RuntimeError(f"Failed to add edges to network: {str(e)}") + + # Generate HTML + try: + network_html = net.generate_html() + final_html_content = self._generate_final_html(network_html) + except Exception as e: + raise RuntimeError(f"Failed to generate network visualization: {str(e)}") + + # Save the final HTML content to the file + try: + with open(f"{filename}.html", "w", encoding="utf-8") as f: + f.write(final_html_content) + print(f"Plot saved as {filename}.html") + except IOError as e: + raise IOError(f"Failed to save flow visualization to {filename}.html: {str(e)}") + + except (ValueError, RuntimeError, IOError) as e: + raise e + except Exception as e: + raise RuntimeError(f"Unexpected error during flow visualization: {str(e)}") + finally: + self._cleanup_pyvis_lib() def _generate_final_html(self, network_html): - # Extract just the body content from the generated HTML - current_dir = os.path.dirname(__file__) - template_path = os.path.join( - current_dir, "assets", "crewai_flow_visual_template.html" - ) - logo_path = os.path.join(current_dir, "assets", "crewai_logo.svg") + """ + Generate the final HTML content with network visualization and legend. - html_handler = HTMLTemplateHandler(template_path, logo_path) - network_body = html_handler.extract_body_content(network_html) + Parameters + ---------- + network_html : str + HTML content generated by pyvis Network. - # Generate the legend items HTML - legend_items = get_legend_items(self.colors) - legend_items_html = generate_legend_items_html(legend_items) - final_html_content = html_handler.generate_final_html( - network_body, legend_items_html - ) - return final_html_content + Returns + ------- + str + Complete HTML content with styling and legend. + + Raises + ------ + IOError + If template or logo files cannot be accessed. + ValueError + If network_html is invalid. + """ + if not network_html: + raise ValueError("Invalid network HTML content") + + try: + # Extract just the body content from the generated HTML + current_dir = os.path.dirname(__file__) + template_path = safe_path_join("assets", "crewai_flow_visual_template.html", root=current_dir) + logo_path = safe_path_join("assets", "crewai_logo.svg", root=current_dir) + + if not os.path.exists(template_path): + raise IOError(f"Template file not found: {template_path}") + if not os.path.exists(logo_path): + raise IOError(f"Logo file not found: {logo_path}") + + html_handler = HTMLTemplateHandler(template_path, logo_path) + network_body = html_handler.extract_body_content(network_html) + + # Generate the legend items HTML + legend_items = get_legend_items(self.colors) + legend_items_html = generate_legend_items_html(legend_items) + final_html_content = html_handler.generate_final_html( + network_body, legend_items_html + ) + return final_html_content + except Exception as e: + raise IOError(f"Failed to generate visualization HTML: {str(e)}") def _cleanup_pyvis_lib(self): - # Clean up the generated lib folder - lib_folder = os.path.join(os.getcwd(), "lib") + """ + Clean up the generated lib folder from pyvis. + + This method safely removes the temporary lib directory created by pyvis + during network visualization generation. + """ try: + lib_folder = safe_path_join("lib", root=os.getcwd()) if os.path.exists(lib_folder) and os.path.isdir(lib_folder): import shutil - shutil.rmtree(lib_folder) + except ValueError as e: + print(f"Error validating lib folder path: {e}") except Exception as e: - print(f"Error cleaning up {lib_folder}: {e}") + print(f"Error cleaning up lib folder: {e}") def plot_flow(flow, filename="flow_plot"): + """ + Convenience function to create and save a flow visualization. + + Parameters + ---------- + flow : Flow + Flow instance to visualize. + filename : str, optional + Output filename without extension, by default "flow_plot". + + Raises + ------ + ValueError + If flow object or filename is invalid. + IOError + If file operations fail. + """ visualizer = FlowPlot(flow) visualizer.plot(filename) diff --git a/src/crewai/flow/html_template_handler.py b/src/crewai/flow/html_template_handler.py index d521d8cf8..396af5546 100644 --- a/src/crewai/flow/html_template_handler.py +++ b/src/crewai/flow/html_template_handler.py @@ -1,11 +1,32 @@ import base64 import re +from pathlib import Path + +from crewai.flow.path_utils import safe_path_join, validate_path_exists class HTMLTemplateHandler: def __init__(self, template_path, logo_path): - self.template_path = template_path - self.logo_path = logo_path + """ + Initialize HTMLTemplateHandler with validated template and logo paths. + + Parameters + ---------- + template_path : str + Path to the HTML template file. + logo_path : str + Path to the logo image file. + + Raises + ------ + ValueError + If template or logo paths are invalid or files don't exist. + """ + try: + self.template_path = validate_path_exists(template_path, "file") + self.logo_path = validate_path_exists(logo_path, "file") + except ValueError as e: + raise ValueError(f"Invalid template or logo path: {e}") def read_template(self): with open(self.template_path, "r", encoding="utf-8") as f: diff --git a/src/crewai/flow/path_utils.py b/src/crewai/flow/path_utils.py new file mode 100644 index 000000000..09ae8cd3d --- /dev/null +++ b/src/crewai/flow/path_utils.py @@ -0,0 +1,135 @@ +""" +Path utilities for secure file operations in CrewAI flow module. + +This module provides utilities for secure path handling to prevent directory +traversal attacks and ensure paths remain within allowed boundaries. +""" + +import os +from pathlib import Path +from typing import List, Union + + +def safe_path_join(*parts: str, root: Union[str, Path, None] = None) -> str: + """ + Safely join path components and ensure the result is within allowed boundaries. + + Parameters + ---------- + *parts : str + Variable number of path components to join. + root : Union[str, Path, None], optional + Root directory to use as base. If None, uses current working directory. + + Returns + ------- + str + String representation of the resolved path. + + Raises + ------ + ValueError + If the resulting path would be outside the root directory + or if any path component is invalid. + """ + if not parts: + raise ValueError("No path components provided") + + try: + # Convert all parts to strings and clean them + clean_parts = [str(part).strip() for part in parts if part] + if not clean_parts: + raise ValueError("No valid path components provided") + + # Establish root directory + root_path = Path(root).resolve() if root else Path.cwd() + + # Join and resolve the full path + full_path = Path(root_path, *clean_parts).resolve() + + # Check if the resolved path is within root + if not str(full_path).startswith(str(root_path)): + raise ValueError( + f"Invalid path: Potential directory traversal. Path must be within {root_path}" + ) + + return str(full_path) + + except Exception as e: + if isinstance(e, ValueError): + raise + raise ValueError(f"Invalid path components: {str(e)}") + + +def validate_path_exists(path: Union[str, Path], file_type: str = "file") -> str: + """ + Validate that a path exists and is of the expected type. + + Parameters + ---------- + path : Union[str, Path] + Path to validate. + file_type : str, optional + Expected type ('file' or 'directory'), by default 'file'. + + Returns + ------- + str + Validated path as string. + + Raises + ------ + ValueError + If path doesn't exist or is not of expected type. + """ + try: + path_obj = Path(path).resolve() + + if not path_obj.exists(): + raise ValueError(f"Path does not exist: {path}") + + if file_type == "file" and not path_obj.is_file(): + raise ValueError(f"Path is not a file: {path}") + elif file_type == "directory" and not path_obj.is_dir(): + raise ValueError(f"Path is not a directory: {path}") + + return str(path_obj) + + except Exception as e: + if isinstance(e, ValueError): + raise + raise ValueError(f"Invalid path: {str(e)}") + + +def list_files(directory: Union[str, Path], pattern: str = "*") -> List[str]: + """ + Safely list files in a directory matching a pattern. + + Parameters + ---------- + directory : Union[str, Path] + Directory to search in. + pattern : str, optional + Glob pattern to match files against, by default "*". + + Returns + ------- + List[str] + List of matching file paths. + + Raises + ------ + ValueError + If directory is invalid or inaccessible. + """ + try: + dir_path = Path(directory).resolve() + if not dir_path.is_dir(): + raise ValueError(f"Not a directory: {directory}") + + return [str(p) for p in dir_path.glob(pattern) if p.is_file()] + + except Exception as e: + if isinstance(e, ValueError): + raise + raise ValueError(f"Error listing files: {str(e)}") diff --git a/src/crewai/flow/utils.py b/src/crewai/flow/utils.py index dc1f611fb..c0686222f 100644 --- a/src/crewai/flow/utils.py +++ b/src/crewai/flow/utils.py @@ -1,9 +1,25 @@ +""" +Utility functions for flow visualization and dependency analysis. + +This module provides core functionality for analyzing and manipulating flow structures, +including node level calculation, ancestor tracking, and return value analysis. +Functions in this module are primarily used by the visualization system to create +accurate and informative flow diagrams. + +Example +------- +>>> flow = Flow() +>>> node_levels = calculate_node_levels(flow) +>>> ancestors = build_ancestor_dict(flow) +""" + import ast import inspect import textwrap +from typing import Any, Dict, List, Optional, Set, Union -def get_possible_return_constants(function): +def get_possible_return_constants(function: Any) -> Optional[List[str]]: try: source = inspect.getsource(function) except OSError: @@ -77,11 +93,34 @@ def get_possible_return_constants(function): return list(return_values) if return_values else None -def calculate_node_levels(flow): - levels = {} - queue = [] - visited = set() - pending_and_listeners = {} +def calculate_node_levels(flow: Any) -> Dict[str, int]: + """ + Calculate the hierarchical level of each node in the flow. + + Performs a breadth-first traversal of the flow graph to assign levels + to nodes, starting with start methods at level 0. + + Parameters + ---------- + flow : Any + The flow instance containing methods, listeners, and router configurations. + + Returns + ------- + Dict[str, int] + Dictionary mapping method names to their hierarchical levels. + + Notes + ----- + - Start methods are assigned level 0 + - Each subsequent connected node is assigned level = parent_level + 1 + - Handles both OR and AND conditions for listeners + - Processes router paths separately + """ + levels: Dict[str, int] = {} + queue: List[str] = [] + visited: Set[str] = set() + pending_and_listeners: Dict[str, Set[str]] = {} # Make all start methods at level 0 for method_name, method in flow._methods.items(): @@ -140,7 +179,20 @@ def calculate_node_levels(flow): return levels -def count_outgoing_edges(flow): +def count_outgoing_edges(flow: Any) -> Dict[str, int]: + """ + Count the number of outgoing edges for each method in the flow. + + Parameters + ---------- + flow : Any + The flow instance to analyze. + + Returns + ------- + Dict[str, int] + Dictionary mapping method names to their outgoing edge count. + """ counts = {} for method_name in flow._methods: counts[method_name] = 0 @@ -152,16 +204,53 @@ def count_outgoing_edges(flow): return counts -def build_ancestor_dict(flow): - ancestors = {node: set() for node in flow._methods} - visited = set() +def build_ancestor_dict(flow: Any) -> Dict[str, Set[str]]: + """ + Build a dictionary mapping each node to its ancestor nodes. + + Parameters + ---------- + flow : Any + The flow instance to analyze. + + Returns + ------- + Dict[str, Set[str]] + Dictionary mapping each node to a set of its ancestor nodes. + """ + ancestors: Dict[str, Set[str]] = {node: set() for node in flow._methods} + visited: Set[str] = set() for node in flow._methods: if node not in visited: dfs_ancestors(node, ancestors, visited, flow) return ancestors -def dfs_ancestors(node, ancestors, visited, flow): +def dfs_ancestors( + node: str, + ancestors: Dict[str, Set[str]], + visited: Set[str], + flow: Any +) -> None: + """ + Perform depth-first search to build ancestor relationships. + + Parameters + ---------- + node : str + Current node being processed. + ancestors : Dict[str, Set[str]] + Dictionary tracking ancestor relationships. + visited : Set[str] + Set of already visited nodes. + flow : Any + The flow instance being analyzed. + + Notes + ----- + This function modifies the ancestors dictionary in-place to build + the complete ancestor graph. + """ if node in visited: return visited.add(node) @@ -185,12 +274,48 @@ def dfs_ancestors(node, ancestors, visited, flow): dfs_ancestors(listener_name, ancestors, visited, flow) -def is_ancestor(node, ancestor_candidate, ancestors): +def is_ancestor(node: str, ancestor_candidate: str, ancestors: Dict[str, Set[str]]) -> bool: + """ + Check if one node is an ancestor of another. + + Parameters + ---------- + node : str + The node to check ancestors for. + ancestor_candidate : str + The potential ancestor node. + ancestors : Dict[str, Set[str]] + Dictionary containing ancestor relationships. + + Returns + ------- + bool + True if ancestor_candidate is an ancestor of node, False otherwise. + """ return ancestor_candidate in ancestors.get(node, set()) -def build_parent_children_dict(flow): - parent_children = {} +def build_parent_children_dict(flow: Any) -> Dict[str, List[str]]: + """ + Build a dictionary mapping parent nodes to their children. + + Parameters + ---------- + flow : Any + The flow instance to analyze. + + Returns + ------- + Dict[str, List[str]] + Dictionary mapping parent method names to lists of their child method names. + + Notes + ----- + - Maps listeners to their trigger methods + - Maps router methods to their paths and listeners + - Children lists are sorted for consistent ordering + """ + parent_children: Dict[str, List[str]] = {} # Map listeners to their trigger methods for listener_name, (_, trigger_methods) in flow._listeners.items(): @@ -214,7 +339,24 @@ def build_parent_children_dict(flow): return parent_children -def get_child_index(parent, child, parent_children): +def get_child_index(parent: str, child: str, parent_children: Dict[str, List[str]]) -> int: + """ + Get the index of a child node in its parent's sorted children list. + + Parameters + ---------- + parent : str + The parent node name. + child : str + The child node name to find the index for. + parent_children : Dict[str, List[str]] + Dictionary mapping parents to their children lists. + + Returns + ------- + int + Zero-based index of the child in its parent's sorted children list. + """ children = parent_children.get(parent, []) children.sort() return children.index(child) diff --git a/src/crewai/flow/visualization_utils.py b/src/crewai/flow/visualization_utils.py index 321f63344..70f527f1a 100644 --- a/src/crewai/flow/visualization_utils.py +++ b/src/crewai/flow/visualization_utils.py @@ -1,5 +1,23 @@ +""" +Utilities for creating visual representations of flow structures. + +This module provides functions for generating network visualizations of flows, +including node placement, edge creation, and visual styling. It handles the +conversion of flow structures into visual network graphs with appropriate +styling and layout. + +Example +------- +>>> flow = Flow() +>>> net = Network(directed=True) +>>> node_positions = compute_positions(flow, node_levels) +>>> add_nodes_to_network(net, flow, node_positions, node_styles) +>>> add_edges(net, flow, node_positions, colors) +""" + import ast import inspect +from typing import Any, Dict, List, Optional, Tuple, Union from .utils import ( build_ancestor_dict, @@ -9,8 +27,25 @@ from .utils import ( ) -def method_calls_crew(method): - """Check if the method calls `.crew()`.""" +def method_calls_crew(method: Any) -> bool: + """ + Check if the method contains a call to `.crew()`. + + Parameters + ---------- + method : Any + The method to analyze for crew() calls. + + Returns + ------- + bool + True if the method calls .crew(), False otherwise. + + Notes + ----- + Uses AST analysis to detect method calls, specifically looking for + attribute access of 'crew'. + """ try: source = inspect.getsource(method) source = inspect.cleandoc(source) @@ -34,7 +69,34 @@ def method_calls_crew(method): return visitor.found -def add_nodes_to_network(net, flow, node_positions, node_styles): +def add_nodes_to_network( + net: Any, + flow: Any, + node_positions: Dict[str, Tuple[float, float]], + node_styles: Dict[str, Dict[str, Any]] +) -> None: + """ + Add nodes to the network visualization with appropriate styling. + + Parameters + ---------- + net : Any + The pyvis Network instance to add nodes to. + flow : Any + The flow instance containing method information. + node_positions : Dict[str, Tuple[float, float]] + Dictionary mapping node names to their (x, y) positions. + node_styles : Dict[str, Dict[str, Any]] + Dictionary containing style configurations for different node types. + + Notes + ----- + Node types include: + - Start methods + - Router methods + - Crew methods + - Regular methods + """ def human_friendly_label(method_name): return method_name.replace("_", " ").title() @@ -73,9 +135,33 @@ def add_nodes_to_network(net, flow, node_positions, node_styles): ) -def compute_positions(flow, node_levels, y_spacing=150, x_spacing=150): - level_nodes = {} - node_positions = {} +def compute_positions( + flow: Any, + node_levels: Dict[str, int], + y_spacing: float = 150, + x_spacing: float = 150 +) -> Dict[str, Tuple[float, float]]: + """ + Compute the (x, y) positions for each node in the flow graph. + + Parameters + ---------- + flow : Any + The flow instance to compute positions for. + node_levels : Dict[str, int] + Dictionary mapping node names to their hierarchical levels. + y_spacing : float, optional + Vertical spacing between levels, by default 150. + x_spacing : float, optional + Horizontal spacing between nodes, by default 150. + + Returns + ------- + Dict[str, Tuple[float, float]] + Dictionary mapping node names to their (x, y) coordinates. + """ + level_nodes: Dict[int, List[str]] = {} + node_positions: Dict[str, Tuple[float, float]] = {} for method_name, level in node_levels.items(): level_nodes.setdefault(level, []).append(method_name) @@ -90,7 +176,33 @@ def compute_positions(flow, node_levels, y_spacing=150, x_spacing=150): return node_positions -def add_edges(net, flow, node_positions, colors): +def add_edges( + net: Any, + flow: Any, + node_positions: Dict[str, Tuple[float, float]], + colors: Dict[str, str] +) -> None: + edge_smooth: Dict[str, Union[str, float]] = {"type": "continuous"} # Default value + """ + Add edges to the network visualization with appropriate styling. + + Parameters + ---------- + net : Any + The pyvis Network instance to add edges to. + flow : Any + The flow instance containing edge information. + node_positions : Dict[str, Tuple[float, float]] + Dictionary mapping node names to their positions. + colors : Dict[str, str] + Dictionary mapping edge types to their colors. + + Notes + ----- + - Handles both normal listener edges and router edges + - Applies appropriate styling (color, dashes) based on edge type + - Adds curvature to edges when needed (cycles or multiple children) + """ ancestors = build_ancestor_dict(flow) parent_children = build_parent_children_dict(flow) @@ -126,7 +238,7 @@ def add_edges(net, flow, node_positions, colors): else: edge_smooth = {"type": "cubicBezier"} else: - edge_smooth = False + edge_smooth.update({"type": "continuous"}) edge_style = { "color": edge_color, @@ -189,7 +301,7 @@ def add_edges(net, flow, node_positions, colors): else: edge_smooth = {"type": "cubicBezier"} else: - edge_smooth = False + edge_smooth.update({"type": "continuous"}) edge_style = { "color": colors["router_edge"], From a548463faebd22aa3167a13ad417b4ab89776478 Mon Sep 17 00:00:00 2001 From: Marco Vinciguerra <88108002+VinciGit00@users.noreply.github.com> Date: Tue, 31 Dec 2024 05:51:43 +0100 Subject: [PATCH 11/11] feat: add docstring (#1819) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: João Moura --- src/crewai/flow/flow.py | 3 --- src/crewai/flow/flow_visualizer.py | 2 ++ src/crewai/flow/html_template_handler.py | 7 +++++++ src/crewai/flow/legend_generator.py | 1 + src/crewai/flow/visualization_utils.py | 1 + 5 files changed, 11 insertions(+), 3 deletions(-) diff --git a/src/crewai/flow/flow.py b/src/crewai/flow/flow.py index 806d9ec84..dc46aa6d8 100644 --- a/src/crewai/flow/flow.py +++ b/src/crewai/flow/flow.py @@ -95,7 +95,6 @@ def start(condition: Optional[Union[str, dict, Callable]] = None) -> Callable: return decorator - def listen(condition: Union[str, dict, Callable]) -> Callable: """ Creates a listener that executes when specified conditions are met. @@ -198,7 +197,6 @@ def router(condition: Union[str, dict, Callable]) -> Callable: """ def decorator(func): func.__is_router__ = True - # Handle conditions like listen/start if isinstance(condition, str): func.__trigger_methods__ = [condition] func.__condition_type__ = "OR" @@ -220,7 +218,6 @@ def router(condition: Union[str, dict, Callable]) -> Callable: return decorator - def or_(*conditions: Union[str, dict, Callable]) -> dict: """ Combines multiple conditions with OR logic for flow control. diff --git a/src/crewai/flow/flow_visualizer.py b/src/crewai/flow/flow_visualizer.py index ceacee91f..a70e91a18 100644 --- a/src/crewai/flow/flow_visualizer.py +++ b/src/crewai/flow/flow_visualizer.py @@ -18,6 +18,8 @@ from crewai.flow.visualization_utils import ( class FlowPlot: + """Handles the creation and rendering of flow visualization diagrams.""" + def __init__(self, flow): """ Initialize FlowPlot with a flow object. diff --git a/src/crewai/flow/html_template_handler.py b/src/crewai/flow/html_template_handler.py index 396af5546..f0d2d89ad 100644 --- a/src/crewai/flow/html_template_handler.py +++ b/src/crewai/flow/html_template_handler.py @@ -6,6 +6,8 @@ from crewai.flow.path_utils import safe_path_join, validate_path_exists class HTMLTemplateHandler: + """Handles HTML template processing and generation for flow visualization diagrams.""" + def __init__(self, template_path, logo_path): """ Initialize HTMLTemplateHandler with validated template and logo paths. @@ -29,19 +31,23 @@ class HTMLTemplateHandler: raise ValueError(f"Invalid template or logo path: {e}") def read_template(self): + """Read and return the HTML template file contents.""" with open(self.template_path, "r", encoding="utf-8") as f: return f.read() def encode_logo(self): + """Convert the logo SVG file to base64 encoded string.""" with open(self.logo_path, "rb") as logo_file: logo_svg_data = logo_file.read() return base64.b64encode(logo_svg_data).decode("utf-8") def extract_body_content(self, html): + """Extract and return content between body tags from HTML string.""" match = re.search("(.*?)", html, re.DOTALL) return match.group(1) if match else "" def generate_legend_items_html(self, legend_items): + """Generate HTML markup for the legend items.""" legend_items_html = "" for item in legend_items: if "border" in item: @@ -69,6 +75,7 @@ class HTMLTemplateHandler: return legend_items_html def generate_final_html(self, network_body, legend_items_html, title="Flow Plot"): + """Combine all components into final HTML document with network visualization.""" html_template = self.read_template() logo_svg_base64 = self.encode_logo() diff --git a/src/crewai/flow/legend_generator.py b/src/crewai/flow/legend_generator.py index fb3d5cfd6..f250dec20 100644 --- a/src/crewai/flow/legend_generator.py +++ b/src/crewai/flow/legend_generator.py @@ -1,3 +1,4 @@ + def get_legend_items(colors): return [ {"label": "Start Method", "color": colors["start"]}, diff --git a/src/crewai/flow/visualization_utils.py b/src/crewai/flow/visualization_utils.py index 70f527f1a..781677276 100644 --- a/src/crewai/flow/visualization_utils.py +++ b/src/crewai/flow/visualization_utils.py @@ -55,6 +55,7 @@ def method_calls_crew(method: Any) -> bool: return False class CrewCallVisitor(ast.NodeVisitor): + """AST visitor to detect .crew() method calls.""" def __init__(self): self.found = False