mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 20:38:29 +00:00
Compare commits
44 Commits
conditiona
...
feat/testi
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
616ffe2aba | ||
|
|
a6bce1089a | ||
|
|
cb8fbf61de | ||
|
|
4d2cdc3d96 | ||
|
|
890c03a0a6 | ||
|
|
e4b419d5be | ||
|
|
8ffc4f79fa | ||
|
|
c05ef3c8cf | ||
|
|
cf600c1a43 | ||
|
|
2a88d1d462 | ||
|
|
660a2ae837 | ||
|
|
6930656897 | ||
|
|
349753a013 | ||
|
|
f53a3a00e1 | ||
|
|
e2113fe417 | ||
|
|
f9288295e6 | ||
|
|
fcc57f2fc0 | ||
|
|
5cb6ee9eeb | ||
|
|
b38f0825e7 | ||
|
|
f51e94dede | ||
|
|
47bf93d291 | ||
|
|
41fd1c6124 | ||
|
|
be1b9a3994 | ||
|
|
61a196394b | ||
|
|
5b442e4350 | ||
|
|
c9920b9823 | ||
|
|
2faa2dbddb | ||
|
|
76607062f0 | ||
|
|
a8cac9b7e9 | ||
|
|
dfacc8832f | ||
|
|
93f643f851 | ||
|
|
cbf5d548be | ||
|
|
6946b89e17 | ||
|
|
dc4911b1ca | ||
|
|
a7a2dfd296 | ||
|
|
7baaeacac3 | ||
|
|
021f2eb8a1 | ||
|
|
731de2ff31 | ||
|
|
24e28da203 | ||
|
|
0415b9982b | ||
|
|
ee32d36312 | ||
|
|
c66559345f | ||
|
|
3ad95d50d4 | ||
|
|
e8cbdb7881 |
@@ -32,6 +32,7 @@ A crew in crewAI represents a collaborative group of agents working together to
|
||||
| **Manager Agent** _(optional)_ | `manager_agent` | `manager` sets a custom agent that will be used as a manager. |
|
||||
| **Manager Callbacks** _(optional)_ | `manager_callbacks` | `manager_callbacks` takes a list of callback handlers to be executed by the manager agent when a hierarchical process is used. |
|
||||
| **Prompt File** _(optional)_ | `prompt_file` | Path to the prompt JSON file to be used for the crew. |
|
||||
| **Planning** *(optional)* | `planning` | Adds planning ability to the Crew. When activated before each Crew iteration, all Crew data is sent to an AgentPlanner that will plan the tasks and this plan will be added to each task description.
|
||||
|
||||
!!! note "Crew Max RPM"
|
||||
The `max_rpm` attribute sets the maximum number of requests per minute the crew can perform to avoid rate limits and will override individual agents' `max_rpm` settings if you set it.
|
||||
@@ -45,6 +46,12 @@ When assembling a crew, you combine agents with complementary roles and tools, a
|
||||
```python
|
||||
from crewai import Crew, Agent, Task, Process
|
||||
from langchain_community.tools import DuckDuckGoSearchRun
|
||||
from crewai_tools import tool
|
||||
|
||||
@tool('DuckDuckGoSearch')
|
||||
def search(search_query: str):
|
||||
"""Search the web for information on a given topic"""
|
||||
return DuckDuckGoSearchRun().run(search_query)
|
||||
|
||||
# Define agents with specific roles and tools
|
||||
researcher = Agent(
|
||||
@@ -55,7 +62,7 @@ researcher = Agent(
|
||||
to the business.
|
||||
You're currently working on a project to analyze the
|
||||
trends and innovations in the space of artificial intelligence.""",
|
||||
tools=[DuckDuckGoSearchRun()]
|
||||
tools=[search]
|
||||
)
|
||||
|
||||
writer = Agent(
|
||||
@@ -213,9 +220,9 @@ These methods provide flexibility in how you manage and execute tasks within you
|
||||
### Replaying from specific task:
|
||||
You can now replay from a specific task using our cli command replay.
|
||||
|
||||
The replay_from_tasks feature in CrewAI allows you to replay from a specific task using the command-line interface (CLI). By running the command `crewai replay -t <task_id>`, you can specify the `task_id` for the replay process.
|
||||
The replay feature in CrewAI allows you to replay from a specific task using the command-line interface (CLI). By running the command `crewai replay -t <task_id>`, you can specify the `task_id` for the replay process.
|
||||
|
||||
Kickoffs will now save the latest kickoffs returned task outputs locally for you to be able to replay from.
|
||||
Kickoffs will now save the latest kickoffs returned task outputs locally for you to be able to replay from.
|
||||
|
||||
|
||||
### Replaying from specific task Using the CLI
|
||||
@@ -236,4 +243,4 @@ crewai log-tasks-outputs
|
||||
crewai replay -t <task_id>
|
||||
```
|
||||
|
||||
These commands let you replay from your latest kickoff tasks, still retaining context from previously executed tasks.
|
||||
These commands let you replay from your latest kickoff tasks, still retaining context from previously executed tasks.
|
||||
|
||||
@@ -29,6 +29,11 @@ description: Leveraging memory systems in the crewAI framework to enhance agent
|
||||
When configuring a crew, you can enable and customize each memory component to suit the crew's objectives and the nature of tasks it will perform.
|
||||
By default, the memory system is disabled, and you can ensure it is active by setting `memory=True` in the crew configuration. The memory will use OpenAI Embeddings by default, but you can change it by setting `embedder` to a different model.
|
||||
|
||||
The 'embedder' only applies to **Short-Term Memory** which uses Chroma for RAG using EmbedChain package.
|
||||
The **Long-Term Memory** uses SQLLite3 to store task results. Currently, there is no way to override these storage implementations.
|
||||
The data storage files are saved into a platform specific location found using the appdirs package
|
||||
and the name of the project which can be overridden using the **CREWAI_STORAGE_DIR** environment variable.
|
||||
|
||||
### Example: Configuring Memory for a Crew
|
||||
|
||||
```python
|
||||
@@ -161,10 +166,43 @@ my_crew = Crew(
|
||||
)
|
||||
```
|
||||
|
||||
### Resetting Memory
|
||||
```sh
|
||||
crewai reset_memories [OPTIONS]
|
||||
```
|
||||
|
||||
#### Resetting Memory Options
|
||||
- **`-l, --long`**
|
||||
- **Description:** Reset LONG TERM memory.
|
||||
- **Type:** Flag (boolean)
|
||||
- **Default:** False
|
||||
|
||||
- **`-s, --short`**
|
||||
- **Description:** Reset SHORT TERM memory.
|
||||
- **Type:** Flag (boolean)
|
||||
- **Default:** False
|
||||
|
||||
- **`-e, --entities`**
|
||||
- **Description:** Reset ENTITIES memory.
|
||||
- **Type:** Flag (boolean)
|
||||
- **Default:** False
|
||||
|
||||
- **`-k, --kickoff-outputs`**
|
||||
- **Description:** Reset LATEST KICKOFF TASK OUTPUTS.
|
||||
- **Type:** Flag (boolean)
|
||||
- **Default:** False
|
||||
|
||||
- **`-a, --all`**
|
||||
- **Description:** Reset ALL memories.
|
||||
- **Type:** Flag (boolean)
|
||||
- **Default:** False
|
||||
|
||||
|
||||
|
||||
## Benefits of Using crewAI's Memory System
|
||||
- **Adaptive Learning:** Crews become more efficient over time, adapting to new information and refining their approach to tasks.
|
||||
- **Enhanced Personalization:** Memory enables agents to remember user preferences and historical interactions, leading to personalized experiences.
|
||||
- **Improved Problem Solving:** Access to a rich memory store aids agents in making more informed decisions, drawing on past learnings and contextual insights.
|
||||
|
||||
## Getting Started
|
||||
Integrating crewAI's memory system into your projects is straightforward. By leveraging the provided memory components and configurations, you can quickly empower your agents with the ability to remember, reason, and learn from their interactions, unlocking new levels of intelligence and capability.
|
||||
Integrating crewAI's memory system into your projects is straightforward. By leveraging the provided memory components and configurations, you can quickly empower your agents with the ability to remember, reason, and learn from their interactions, unlocking new levels of intelligence and capability.
|
||||
|
||||
119
docs/core-concepts/Planning.md
Normal file
119
docs/core-concepts/Planning.md
Normal file
@@ -0,0 +1,119 @@
|
||||
---
|
||||
title: crewAI Planning
|
||||
description: Learn how to add planning to your crewAI Crew and improve their performance.
|
||||
---
|
||||
|
||||
## Introduction
|
||||
The planning feature in CrewAI allows you to add planning capability to your crew. When enabled, before each Crew iteration, all Crew information is sent to an AgentPlanner that will plan the tasks step by step, and this plan will be added to each task description.
|
||||
|
||||
### Using the Planning Feature
|
||||
Getting started with the planning feature is very easy, the only step required is to add `planning=True` to your Crew:
|
||||
|
||||
```python
|
||||
from crewai import Crew, Agent, Task, Process
|
||||
|
||||
# Assemble your crew with planning capabilities
|
||||
my_crew = Crew(
|
||||
agents=self.agents,
|
||||
tasks=self.tasks,
|
||||
process=Process.sequential,
|
||||
planning=True,
|
||||
)
|
||||
```
|
||||
|
||||
From this point on, your crew will have planning enabled, and the tasks will be planned before each iteration.
|
||||
|
||||
### Example
|
||||
|
||||
When running the base case example, you will see something like the following output, which represents the output of the AgentPlanner responsible for creating the step-by-step logic to add to the Agents tasks.
|
||||
|
||||
```bash
|
||||
|
||||
[2024-07-15 16:49:11][INFO]: Planning the crew execution
|
||||
**Step-by-Step Plan for Task Execution**
|
||||
|
||||
**Task Number 1: Conduct a thorough research about AI LLMs**
|
||||
|
||||
**Agent:** AI LLMs Senior Data Researcher
|
||||
|
||||
**Agent Goal:** Uncover cutting-edge developments in AI LLMs
|
||||
|
||||
**Task Expected Output:** A list with 10 bullet points of the most relevant information about AI LLMs
|
||||
|
||||
**Task Tools:** None specified
|
||||
|
||||
**Agent Tools:** None specified
|
||||
|
||||
**Step-by-Step Plan:**
|
||||
|
||||
1. **Define Research Scope:**
|
||||
- Determine the specific areas of AI LLMs to focus on, such as advancements in architecture, use cases, ethical considerations, and performance metrics.
|
||||
|
||||
2. **Identify Reliable Sources:**
|
||||
- List reputable sources for AI research, including academic journals, industry reports, conferences (e.g., NeurIPS, ACL), AI research labs (e.g., OpenAI, Google AI), and online databases (e.g., IEEE Xplore, arXiv).
|
||||
|
||||
3. **Collect Data:**
|
||||
- Search for the latest papers, articles, and reports published in 2023 and early 2024.
|
||||
- Use keywords like "Large Language Models 2024", "AI LLM advancements", "AI ethics 2024", etc.
|
||||
|
||||
4. **Analyze Findings:**
|
||||
- Read and summarize the key points from each source.
|
||||
- Highlight new techniques, models, and applications introduced in the past year.
|
||||
|
||||
5. **Organize Information:**
|
||||
- Categorize the information into relevant topics (e.g., new architectures, ethical implications, real-world applications).
|
||||
- Ensure each bullet point is concise but informative.
|
||||
|
||||
6. **Create the List:**
|
||||
- Compile the 10 most relevant pieces of information into a bullet point list.
|
||||
- Review the list to ensure clarity and relevance.
|
||||
|
||||
**Expected Output:**
|
||||
A list with 10 bullet points of the most relevant information about AI LLMs.
|
||||
|
||||
---
|
||||
|
||||
**Task Number 2: Review the context you got and expand each topic into a full section for a report**
|
||||
|
||||
**Agent:** AI LLMs Reporting Analyst
|
||||
|
||||
**Agent Goal:** Create detailed reports based on AI LLMs data analysis and research findings
|
||||
|
||||
**Task Expected Output:** A fully fledge report with the main topics, each with a full section of information. Formatted as markdown without '```'
|
||||
|
||||
**Task Tools:** None specified
|
||||
|
||||
**Agent Tools:** None specified
|
||||
|
||||
**Step-by-Step Plan:**
|
||||
|
||||
1. **Review the Bullet Points:**
|
||||
- Carefully read through the list of 10 bullet points provided by the AI LLMs Senior Data Researcher.
|
||||
|
||||
2. **Outline the Report:**
|
||||
- Create an outline with each bullet point as a main section heading.
|
||||
- Plan sub-sections under each main heading to cover different aspects of the topic.
|
||||
|
||||
3. **Research Further Details:**
|
||||
- For each bullet point, conduct additional research if necessary to gather more detailed information.
|
||||
- Look for case studies, examples, and statistical data to support each section.
|
||||
|
||||
4. **Write Detailed Sections:**
|
||||
- Expand each bullet point into a comprehensive section.
|
||||
- Ensure each section includes an introduction, detailed explanation, examples, and a conclusion.
|
||||
- Use markdown formatting for headings, subheadings, lists, and emphasis.
|
||||
|
||||
5. **Review and Edit:**
|
||||
- Proofread the report for clarity, coherence, and correctness.
|
||||
- Make sure the report flows logically from one section to the next.
|
||||
- Format the report according to markdown standards.
|
||||
|
||||
6. **Finalize the Report:**
|
||||
- Ensure the report is complete with all sections expanded and detailed.
|
||||
- Double-check formatting and make any necessary adjustments.
|
||||
|
||||
**Expected Output:**
|
||||
A fully-fledged report with the main topics, each with a full section of information. Formatted as markdown without '```'.
|
||||
|
||||
---
|
||||
```
|
||||
41
docs/core-concepts/Testing.md
Normal file
41
docs/core-concepts/Testing.md
Normal file
@@ -0,0 +1,41 @@
|
||||
---
|
||||
title: crewAI Testing
|
||||
description: Learn how to test your crewAI Crew and evaluate their performance.
|
||||
---
|
||||
|
||||
## Introduction
|
||||
|
||||
Testing is a crucial part of the development process, and it is essential to ensure that your crew is performing as expected. And with crewAI, you can easily test your crew and evaluate its performance using the built-in testing capabilities.
|
||||
|
||||
### Using the Testing Feature
|
||||
|
||||
We added the CLI command `crewai test` to make it easy to test your crew. This command will run your crew for a specified number of iterations and provide detailed performance metrics.
|
||||
The parameters are `n_iterations` and `model` which are optional and default to 2 and `gpt-4o-mini` respectively. For now the only provider available is OpenAI.
|
||||
|
||||
```bash
|
||||
crewai test
|
||||
```
|
||||
|
||||
If you want to run more iterations or use a different model, you can specify the parameters like this:
|
||||
|
||||
```bash
|
||||
crewai test --n_iterations 5 --model gpt-4o
|
||||
```
|
||||
|
||||
What happens when you run the `crewai test` command is that the crew will be executed for the specified number of iterations, and the performance metrics will be displayed at the end of the run.
|
||||
|
||||
A table of scores at the end will show the performance of the crew in terms of the following metrics:
|
||||
```
|
||||
Task Scores
|
||||
(1-10 Higher is better)
|
||||
┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
|
||||
┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
|
||||
┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
|
||||
│ Task 1 │ 10.0 │ 9.0 │ 9.5 │
|
||||
│ Task 2 │ 9.0 │ 9.0 │ 9.0 │
|
||||
│ Crew │ 9.5 │ 9.0 │ 9.2 │
|
||||
└────────────┴───────┴───────┴────────────┘
|
||||
```
|
||||
|
||||
The example above shows the test results for two runs of the crew with two tasks, with the average total score for each task and the crew as a whole.
|
||||
|
||||
@@ -100,16 +100,24 @@ Here is a list of the available tools and their descriptions:
|
||||
|
||||
| Tool | Description |
|
||||
| :-------------------------- | :-------------------------------------------------------------------------------------------- |
|
||||
| **BrowserbaseLoadTool** | A tool for interacting with and extracting data from web browsers. |
|
||||
| **CodeDocsSearchTool** | A RAG tool optimized for searching through code documentation and related technical documents. |
|
||||
| **CodeInterpreterTool** | A tool for interpreting python code. |
|
||||
| **ComposioTool** | Enables use of Composio tools. |
|
||||
| **CSVSearchTool** | A RAG tool designed for searching within CSV files, tailored to handle structured data. |
|
||||
| **DirectorySearchTool** | A RAG tool for searching within directories, useful for navigating through file systems. |
|
||||
| **DOCXSearchTool** | A RAG tool aimed at searching within DOCX documents, ideal for processing Word files. |
|
||||
| **DirectoryReadTool** | Facilitates reading and processing of directory structures and their contents. |
|
||||
| **EXASearchTool** | A tool designed for performing exhaustive searches across various data sources. |
|
||||
| **FileReadTool** | Enables reading and extracting data from files, supporting various file formats. |
|
||||
| **FirecrawlSearchTool** | A tool to search webpages using Firecrawl and return the results. |
|
||||
| **FirecrawlCrawlWebsiteTool** | A tool for crawling webpages using Firecrawl. |
|
||||
| **FirecrawlScrapeWebsiteTool** | A tool for scraping webpages url using Firecrawl and returning its contents. |
|
||||
| **GithubSearchTool** | A RAG tool for searching within GitHub repositories, useful for code and documentation search.|
|
||||
| **SerperDevTool** | A specialized tool for development purposes, with specific functionalities under development. |
|
||||
| **TXTSearchTool** | A RAG tool focused on searching within text (.txt) files, suitable for unstructured data. |
|
||||
| **JSONSearchTool** | A RAG tool designed for searching within JSON files, catering to structured data handling. |
|
||||
| **LlamaIndexTool** | Enables the use of LlamaIndex tools. |
|
||||
| **MDXSearchTool** | A RAG tool tailored for searching within Markdown (MDX) files, useful for documentation. |
|
||||
| **PDFSearchTool** | A RAG tool aimed at searching within PDF documents, ideal for processing scanned documents. |
|
||||
| **PGSearchTool** | A RAG tool optimized for searching within PostgreSQL databases, suitable for database queries. |
|
||||
@@ -120,8 +128,6 @@ Here is a list of the available tools and their descriptions:
|
||||
| **XMLSearchTool** | A RAG tool designed for searching within XML files, suitable for structured data formats. |
|
||||
| **YoutubeChannelSearchTool**| A RAG tool for searching within YouTube channels, useful for video content analysis. |
|
||||
| **YoutubeVideoSearchTool** | A RAG tool aimed at searching within YouTube videos, ideal for video data extraction. |
|
||||
| **BrowserbaseTool** | A tool for interacting with and extracting data from web browsers. |
|
||||
| **ExaSearchTool** | A tool designed for performing exhaustive searches across various data sources. |
|
||||
|
||||
## Creating your own Tools
|
||||
|
||||
|
||||
@@ -21,14 +21,16 @@ Define your agents with distinct roles, backstories, and enhanced capabilities.
|
||||
import os
|
||||
from langchain.llms import OpenAI
|
||||
from crewai import Agent
|
||||
from crewai_tools import SerperDevTool, BrowserbaseTool, ExaSearchTool
|
||||
from crewai_tools import SerperDevTool, BrowserbaseLoadTool, EXASearchTool
|
||||
|
||||
os.environ["OPENAI_API_KEY"] = "Your OpenAI Key"
|
||||
os.environ["SERPER_API_KEY"] = "Your Serper Key"
|
||||
os.environ["BROWSERBASE_API_KEY"] = "Your BrowserBase Key"
|
||||
os.environ["BROWSERBASE_PROJECT_ID"] = "Your BrowserBase Project Id"
|
||||
|
||||
search_tool = SerperDevTool()
|
||||
browser_tool = BrowserbaseTool()
|
||||
exa_search_tool = ExaSearchTool()
|
||||
browser_tool = BrowserbaseLoadTool()
|
||||
exa_search_tool = EXASearchTool()
|
||||
|
||||
# Creating a senior researcher agent with advanced configurations
|
||||
researcher = Agent(
|
||||
|
||||
@@ -36,14 +36,14 @@ To replay from a task programmatically, use the following steps:
|
||||
2. Execute the replay command within a try-except block to handle potential errors.
|
||||
|
||||
```python
|
||||
def replay_from_task():
|
||||
def replay():
|
||||
"""
|
||||
Replay the crew execution from a specific task.
|
||||
"""
|
||||
task_id = '<task_id>'
|
||||
inputs = {"topic": "CrewAI Training"} # this is optional, you can pass in the inputs you want to replay otherwise uses the previous kickoffs inputs
|
||||
try:
|
||||
YourCrewName_Crew().crew().replay_from_task(task_id=task_id, inputs=inputs)
|
||||
YourCrewName_Crew().crew().replay(task_id=task_id, inputs=inputs)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"An error occurred while replaying the crew: {e}")
|
||||
@@ -9,7 +9,7 @@ Welcome to the ultimate guide for starting a new CrewAI project. This document w
|
||||
|
||||
## Prerequisites
|
||||
|
||||
We assume you have already installed CrewAI. If not, please refer to the [installation guide](how-to/Installing-CrewAI.md) to install CrewAI and its dependencies.
|
||||
We assume you have already installed CrewAI. If not, please refer to the [installation guide](https://docs.crewai.com/how-to/Installing-CrewAI/) to install CrewAI and its dependencies.
|
||||
|
||||
## Creating a New Project
|
||||
|
||||
@@ -134,4 +134,4 @@ This will initialize your crew of AI agents and begin task execution as defined
|
||||
|
||||
## Deploying Your Project
|
||||
|
||||
The easiest way to deploy your crew is through [CrewAI+](https://www.crewai.com/crewaiplus), where you can deploy your crew in a few clicks.
|
||||
The easiest way to deploy your crew is through [CrewAI+](https://www.crewai.com/crewaiplus), where you can deploy your crew in a few clicks.
|
||||
|
||||
@@ -43,6 +43,11 @@ Cutting-edge framework for orchestrating role-playing, autonomous AI agents. By
|
||||
Memory
|
||||
</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="./core-concepts/Planning">
|
||||
Planning
|
||||
</a>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div style="width:30%">
|
||||
|
||||
@@ -128,6 +128,8 @@ nav:
|
||||
- Collaboration: 'core-concepts/Collaboration.md'
|
||||
- Training: 'core-concepts/Training-Crew.md'
|
||||
- Memory: 'core-concepts/Memory.md'
|
||||
- Planning: 'core-concepts/Planning.md'
|
||||
- Testing: 'core-concepts/Testing.md'
|
||||
- Using LangChain Tools: 'core-concepts/Using-LangChain-Tools.md'
|
||||
- Using LlamaIndex Tools: 'core-concepts/Using-LlamaIndex-Tools.md'
|
||||
- How to Guides:
|
||||
|
||||
1201
poetry.lock
generated
1201
poetry.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
[tool.poetry]
|
||||
name = "crewai"
|
||||
version = "0.36.0"
|
||||
version = "0.41.1"
|
||||
description = "Cutting-edge framework for orchestrating role-playing, autonomous AI agents. By fostering collaborative intelligence, CrewAI empowers agents to work together seamlessly, tackling complex tasks."
|
||||
authors = ["Joao Moura <joao@crewai.com>"]
|
||||
readme = "README.md"
|
||||
@@ -21,12 +21,12 @@ opentelemetry-sdk = "^1.22.0"
|
||||
opentelemetry-exporter-otlp-proto-http = "^1.22.0"
|
||||
instructor = "1.3.3"
|
||||
regex = "^2023.12.25"
|
||||
crewai-tools = { version = "^0.4.8", optional = true }
|
||||
crewai-tools = { version = "^0.4.26", optional = true }
|
||||
click = "^8.1.7"
|
||||
python-dotenv = "^1.0.0"
|
||||
appdirs = "^1.4.4"
|
||||
jsonref = "^1.1.0"
|
||||
agentops = { version = "^0.1.9", optional = true }
|
||||
agentops = { version = "^0.3.0", optional = true }
|
||||
embedchain = "^0.1.114"
|
||||
json-repair = "^0.25.2"
|
||||
|
||||
@@ -46,7 +46,7 @@ mkdocs-material = { extras = ["imaging"], version = "^9.5.7" }
|
||||
mkdocs-material-extensions = "^1.3.1"
|
||||
pillow = "^10.2.0"
|
||||
cairosvg = "^2.7.1"
|
||||
crewai-tools = "^0.4.8"
|
||||
crewai-tools = "^0.4.26"
|
||||
|
||||
[tool.poetry.group.test.dependencies]
|
||||
pytest = "^8.0.0"
|
||||
|
||||
@@ -242,6 +242,8 @@ class CrewAgentExecutor(AgentExecutor, CrewAgentExecutorMixin):
|
||||
else:
|
||||
if tool_calling.tool_name.casefold().strip() in [
|
||||
name.casefold().strip() for name in name_to_tool_map
|
||||
] or tool_calling.tool_name.casefold().replace("_", " ") in [
|
||||
name.casefold().strip() for name in name_to_tool_map
|
||||
]:
|
||||
observation = tool_usage.use(tool_calling, agent_action.log)
|
||||
else:
|
||||
|
||||
@@ -5,10 +5,11 @@ from crewai.memory.storage.kickoff_task_outputs_storage import (
|
||||
KickoffTaskOutputsSQLiteStorage,
|
||||
)
|
||||
|
||||
|
||||
from .create_crew import create_crew
|
||||
from .train_crew import train_crew
|
||||
from .replay_from_task import replay_task_command
|
||||
from .reset_memories_command import reset_memories_command
|
||||
from .test_crew import test_crew
|
||||
from .train_crew import train_crew
|
||||
|
||||
|
||||
@click.group()
|
||||
@@ -99,5 +100,52 @@ def log_tasks_outputs() -> None:
|
||||
click.echo(f"An error occurred while logging task outputs: {e}", err=True)
|
||||
|
||||
|
||||
@crewai.command()
|
||||
@click.option("-l", "--long", is_flag=True, help="Reset LONG TERM memory")
|
||||
@click.option("-s", "--short", is_flag=True, help="Reset SHORT TERM memory")
|
||||
@click.option("-e", "--entities", is_flag=True, help="Reset ENTITIES memory")
|
||||
@click.option(
|
||||
"-k",
|
||||
"--kickoff-outputs",
|
||||
is_flag=True,
|
||||
help="Reset LATEST KICKOFF TASK OUTPUTS",
|
||||
)
|
||||
@click.option("-a", "--all", is_flag=True, help="Reset ALL memories")
|
||||
def reset_memories(long, short, entities, kickoff_outputs, all):
|
||||
"""
|
||||
Reset the crew memories (long, short, entity, latest_crew_kickoff_ouputs). This will delete all the data saved.
|
||||
"""
|
||||
try:
|
||||
if not all and not (long or short or entities or kickoff_outputs):
|
||||
click.echo(
|
||||
"Please specify at least one memory type to reset using the appropriate flags."
|
||||
)
|
||||
return
|
||||
reset_memories_command(long, short, entities, kickoff_outputs, all)
|
||||
except Exception as e:
|
||||
click.echo(f"An error occurred while resetting memories: {e}", err=True)
|
||||
|
||||
|
||||
@crewai.command()
|
||||
@click.option(
|
||||
"-n",
|
||||
"--n_iterations",
|
||||
type=int,
|
||||
default=3,
|
||||
help="Number of iterations to Test the crew",
|
||||
)
|
||||
@click.option(
|
||||
"-m",
|
||||
"--model",
|
||||
type=str,
|
||||
default="gpt-4o-mini",
|
||||
help="LLM Model to run the tests on the Crew. For now only accepting only OpenAI models.",
|
||||
)
|
||||
def test(n_iterations: int, model: str):
|
||||
"""Test the crew and evaluate the results."""
|
||||
click.echo(f"Testing the crew for {n_iterations} iterations with model {model}")
|
||||
test_crew(n_iterations, model)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
crewai()
|
||||
|
||||
45
src/crewai/cli/reset_memories_command.py
Normal file
45
src/crewai/cli/reset_memories_command.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import subprocess
|
||||
import click
|
||||
|
||||
from crewai.memory.entity.entity_memory import EntityMemory
|
||||
from crewai.memory.long_term.long_term_memory import LongTermMemory
|
||||
from crewai.memory.short_term.short_term_memory import ShortTermMemory
|
||||
from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler
|
||||
|
||||
|
||||
def reset_memories_command(long, short, entity, kickoff_outputs, all) -> None:
|
||||
"""
|
||||
Replay the crew execution from a specific task.
|
||||
|
||||
Args:
|
||||
task_id (str): The ID of the task to replay from.
|
||||
"""
|
||||
|
||||
try:
|
||||
if all:
|
||||
ShortTermMemory().reset()
|
||||
EntityMemory().reset()
|
||||
LongTermMemory().reset()
|
||||
TaskOutputStorageHandler().reset()
|
||||
click.echo("All memories have been reset.")
|
||||
else:
|
||||
if long:
|
||||
LongTermMemory().reset()
|
||||
click.echo("Long term memory has been reset.")
|
||||
|
||||
if short:
|
||||
ShortTermMemory().reset()
|
||||
click.echo("Short term memory has been reset.")
|
||||
if entity:
|
||||
EntityMemory().reset()
|
||||
click.echo("Entity memory has been reset.")
|
||||
if kickoff_outputs:
|
||||
TaskOutputStorageHandler().reset()
|
||||
click.echo("Latest Kickoff outputs stored has been reset.")
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
click.echo(f"An error occurred while resetting the memories: {e}", err=True)
|
||||
click.echo(e.output, err=True)
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"An unexpected error occurred: {e}", err=True)
|
||||
@@ -2,9 +2,15 @@
|
||||
import sys
|
||||
from {{folder_name}}.crew import {{crew_name}}Crew
|
||||
|
||||
# This main file is intended to be a way for your to run your
|
||||
# crew locally, so refrain from adding necessary logic into this file.
|
||||
# Replace with inputs you want to test with, it will automatically
|
||||
# interpolate any tasks and agents information
|
||||
|
||||
def run():
|
||||
# Replace with your inputs, it will automatically interpolate any tasks and agents information
|
||||
"""
|
||||
Run the crew.
|
||||
"""
|
||||
inputs = {
|
||||
'topic': 'AI LLMs'
|
||||
}
|
||||
@@ -15,19 +21,34 @@ def train():
|
||||
"""
|
||||
Train the crew for a given number of iterations.
|
||||
"""
|
||||
inputs = {"topic": "AI LLMs"}
|
||||
inputs = {
|
||||
"topic": "AI LLMs"
|
||||
}
|
||||
try:
|
||||
{{crew_name}}Crew().crew().train(n_iterations=int(sys.argv[1]), inputs=inputs)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"An error occurred while training the crew: {e}")
|
||||
|
||||
def replay_from_task():
|
||||
def replay():
|
||||
"""
|
||||
Replay the crew execution from a specific task.
|
||||
"""
|
||||
try:
|
||||
{{crew_name}}Crew().crew().replay_from_task(task_id=sys.argv[1])
|
||||
{{crew_name}}Crew().crew().replay(task_id=sys.argv[1])
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"An error occurred while replaying the crew: {e}")
|
||||
|
||||
def test():
|
||||
"""
|
||||
Test the crew execution and returns the results.
|
||||
"""
|
||||
inputs = {
|
||||
"topic": "AI LLMs"
|
||||
}
|
||||
try:
|
||||
{{crew_name}}Crew().crew().test(n_iterations=int(sys.argv[1]), openai_model_name=sys.argv[2], inputs=inputs)
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"An error occurred while replaying the crew: {e}")
|
||||
|
||||
@@ -6,12 +6,13 @@ authors = ["Your Name <you@example.com>"]
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = ">=3.10,<=3.13"
|
||||
crewai = { extras = ["tools"], version = "^0.35.8" }
|
||||
crewai = { extras = ["tools"], version = "^0.41.1" }
|
||||
|
||||
[tool.poetry.scripts]
|
||||
{{folder_name}} = "{{folder_name}}.main:run"
|
||||
train = "{{folder_name}}.main:train"
|
||||
replay = "{{folder_name}}.main:replay_from_task"
|
||||
replay = "{{folder_name}}.main:replay"
|
||||
test = "{{folder_name}}.main:test"
|
||||
|
||||
[build-system]
|
||||
requires = ["poetry-core"]
|
||||
|
||||
30
src/crewai/cli/test_crew.py
Normal file
30
src/crewai/cli/test_crew.py
Normal file
@@ -0,0 +1,30 @@
|
||||
import subprocess
|
||||
|
||||
import click
|
||||
|
||||
|
||||
def test_crew(n_iterations: int, model: str) -> None:
|
||||
"""
|
||||
Test the crew by running a command in the Poetry environment.
|
||||
|
||||
Args:
|
||||
n_iterations (int): The number of iterations to test the crew.
|
||||
model (str): The model to test the crew with.
|
||||
"""
|
||||
command = ["poetry", "run", "test", str(n_iterations), model]
|
||||
|
||||
try:
|
||||
if n_iterations <= 0:
|
||||
raise ValueError("The number of iterations must be a positive integer.")
|
||||
|
||||
result = subprocess.run(command, capture_output=False, text=True, check=True)
|
||||
|
||||
if result.stderr:
|
||||
click.echo(result.stderr, err=True)
|
||||
|
||||
except subprocess.CalledProcessError as e:
|
||||
click.echo(f"An error occurred while testing the crew: {e}", err=True)
|
||||
click.echo(e.output, err=True)
|
||||
|
||||
except Exception as e:
|
||||
click.echo(f"An unexpected error occurred: {e}", err=True)
|
||||
@@ -37,11 +37,13 @@ from crewai.utilities.constants import (
|
||||
TRAINED_AGENTS_DATA_FILE,
|
||||
TRAINING_DATA_FILE,
|
||||
)
|
||||
from crewai.utilities.evaluators.crew_evaluator_handler import CrewEvaluator
|
||||
from crewai.utilities.evaluators.task_evaluator import TaskEvaluator
|
||||
from crewai.utilities.formatter import (
|
||||
aggregate_raw_outputs_from_task_outputs,
|
||||
aggregate_raw_outputs_from_tasks,
|
||||
)
|
||||
from crewai.utilities.planning_handler import CrewPlanner
|
||||
from crewai.utilities.task_output_storage_handler import TaskOutputStorageHandler
|
||||
from crewai.utilities.training_handler import CrewTrainingHandler
|
||||
|
||||
@@ -73,6 +75,7 @@ class Crew(BaseModel):
|
||||
task_callback: Callback to be executed after each task for every agents execution.
|
||||
step_callback: Callback to be executed after each step for every agents execution.
|
||||
share_crew: Whether you want to share the complete crew information and execution with crewAI to make the library better, and allow us to train models.
|
||||
planning: Plan the crew execution and add the plan to the crew.
|
||||
"""
|
||||
|
||||
__hash__ = object.__hash__ # type: ignore
|
||||
@@ -148,6 +151,10 @@ class Crew(BaseModel):
|
||||
default=False,
|
||||
description="output_log_file",
|
||||
)
|
||||
planning: Optional[bool] = Field(
|
||||
default=False,
|
||||
description="Plan the crew execution and add the plan to the crew.",
|
||||
)
|
||||
task_execution_output_json_files: Optional[List[str]] = Field(
|
||||
default=None,
|
||||
description="List of file paths for task execution JSON files.",
|
||||
@@ -453,6 +460,9 @@ class Crew(BaseModel):
|
||||
|
||||
agent.create_agent_executor()
|
||||
|
||||
if self.planning:
|
||||
self._handle_crew_planning()
|
||||
|
||||
metrics = []
|
||||
|
||||
if self.process == Process.sequential:
|
||||
@@ -547,6 +557,19 @@ class Crew(BaseModel):
|
||||
self._task_output_handler.reset()
|
||||
return results
|
||||
|
||||
def _handle_crew_planning(self):
|
||||
"""Handles the Crew planning."""
|
||||
self._logger.log("info", "Planning the crew execution")
|
||||
result = CrewPlanner(self.tasks)._handle_crew_planning()
|
||||
|
||||
if result is not None and hasattr(result, "list_of_plans_per_task"):
|
||||
for task, step_plan in zip(self.tasks, result.list_of_plans_per_task):
|
||||
task.description += step_plan
|
||||
else:
|
||||
self._logger.log(
|
||||
"info", "Something went wrong with the planning process of the Crew"
|
||||
)
|
||||
|
||||
def _store_execution_log(
|
||||
self,
|
||||
task: Task,
|
||||
@@ -635,16 +658,14 @@ class Crew(BaseModel):
|
||||
last_sync_output = task.output
|
||||
continue
|
||||
|
||||
self._prepare_task(task, manager)
|
||||
if self.process == Process.hierarchical:
|
||||
agent_to_use = manager
|
||||
else:
|
||||
agent_to_use = task.agent
|
||||
agent_to_use = self._get_agent_to_use(task, manager)
|
||||
if agent_to_use is None:
|
||||
raise ValueError(
|
||||
f"No agent available for task: {task.description}. Ensure that either the task has an assigned agent or a manager agent is provided."
|
||||
)
|
||||
self._log_task_start(task, agent_to_use)
|
||||
|
||||
self._prepare_agent_tools(task, manager)
|
||||
self._log_task_start(task, agent_to_use.role)
|
||||
|
||||
if isinstance(task, ConditionalTask):
|
||||
skipped_task_output = self._handle_conditional_task(
|
||||
@@ -709,12 +730,22 @@ class Crew(BaseModel):
|
||||
return skipped_task_output
|
||||
return None
|
||||
|
||||
def _prepare_task(self, task: Task, manager: Optional[BaseAgent]):
|
||||
def _prepare_agent_tools(self, task: Task, manager: Optional[BaseAgent]):
|
||||
if self.process == Process.hierarchical:
|
||||
self._update_manager_tools(task, manager)
|
||||
if manager:
|
||||
self._update_manager_tools(task, manager)
|
||||
else:
|
||||
raise ValueError("Manager agent is required for hierarchical process.")
|
||||
elif task.agent and task.agent.allow_delegation:
|
||||
self._add_delegation_tools(task)
|
||||
|
||||
def _get_agent_to_use(
|
||||
self, task: Task, manager: Optional[BaseAgent]
|
||||
) -> Optional[BaseAgent]:
|
||||
if self.process == Process.hierarchical:
|
||||
return manager
|
||||
return task.agent
|
||||
|
||||
def _add_delegation_tools(self, task: Task):
|
||||
agents_for_delegation = [agent for agent in self.agents if agent != task.agent]
|
||||
if len(self.agents) > 1 and len(agents_for_delegation) > 0 and task.agent:
|
||||
@@ -741,18 +772,17 @@ class Crew(BaseModel):
|
||||
# Add the new tool
|
||||
task.tools.append(new_tool)
|
||||
|
||||
def _log_task_start(self, task: Task, agent: Optional[BaseAgent]):
|
||||
def _log_task_start(self, task: Task, role: str = "None"):
|
||||
color = self._logging_color
|
||||
role = agent.role if agent else "None"
|
||||
self._logger.log("debug", f"== Working Agent: {role}", color=color)
|
||||
self._logger.log("info", f"== Starting Task: {task.description}", color=color)
|
||||
if self.output_log_file:
|
||||
self._file_handler.log(agent=role, task=task.description, status="started")
|
||||
|
||||
def _update_manager_tools(self, task: Task, manager: Optional[BaseAgent]):
|
||||
if task.agent and manager:
|
||||
def _update_manager_tools(self, task: Task, manager: BaseAgent):
|
||||
if task.agent:
|
||||
manager.tools = task.agent.get_delegation_tools([task.agent])
|
||||
if manager:
|
||||
else:
|
||||
manager.tools = manager.get_delegation_tools(self.agents)
|
||||
|
||||
def _get_context(self, task: Task, task_outputs: List[TaskOutput]):
|
||||
@@ -792,7 +822,7 @@ class Crew(BaseModel):
|
||||
futures: List[Tuple[Task, Future[TaskOutput], int]],
|
||||
was_replayed: bool = False,
|
||||
) -> List[TaskOutput]:
|
||||
task_outputs = []
|
||||
task_outputs: List[TaskOutput] = []
|
||||
for future_task, future, task_index in futures:
|
||||
task_output = future.result()
|
||||
task_outputs.append(task_output)
|
||||
@@ -814,7 +844,7 @@ class Crew(BaseModel):
|
||||
None,
|
||||
)
|
||||
|
||||
def replay_from_task(
|
||||
def replay(
|
||||
self, task_id: str, inputs: Optional[Dict[str, Any]] = None
|
||||
) -> CrewOutput:
|
||||
stored_outputs = self._task_output_handler.load()
|
||||
@@ -937,5 +967,20 @@ class Crew(BaseModel):
|
||||
|
||||
return total_usage_metrics
|
||||
|
||||
def test(
|
||||
self,
|
||||
n_iterations: int,
|
||||
openai_model_name: str,
|
||||
inputs: Optional[Dict[str, Any]] = None,
|
||||
) -> None:
|
||||
"""Test and evaluate the Crew with the given inputs for n iterations."""
|
||||
evaluator = CrewEvaluator(self, openai_model_name)
|
||||
|
||||
for i in range(1, n_iterations + 1):
|
||||
evaluator.set_iteration(i)
|
||||
self.kickoff(inputs=inputs)
|
||||
|
||||
evaluator.print_crew_evaluation_result()
|
||||
|
||||
def __repr__(self):
|
||||
return f"Crew(id={self.id}, process={self.process}, number_of_agents={len(self.agents)}, number_of_tasks={len(self.tasks)})"
|
||||
|
||||
@@ -23,3 +23,9 @@ class EntityMemory(Memory):
|
||||
"""Saves an entity item into the SQLite storage."""
|
||||
data = f"{item.name}({item.type}): {item.description}"
|
||||
super().save(data, item.metadata)
|
||||
|
||||
def reset(self) -> None:
|
||||
try:
|
||||
self.storage.reset()
|
||||
except Exception as e:
|
||||
raise Exception(f"An error occurred while resetting the entity memory: {e}")
|
||||
|
||||
@@ -30,3 +30,6 @@ class LongTermMemory(Memory):
|
||||
|
||||
def search(self, task: str, latest_n: int = 3) -> Dict[str, Any]:
|
||||
return self.storage.load(task, latest_n) # type: ignore # BUG?: "Storage" has no attribute "load"
|
||||
|
||||
def reset(self) -> None:
|
||||
self.storage.reset()
|
||||
|
||||
@@ -18,8 +18,16 @@ class ShortTermMemory(Memory):
|
||||
)
|
||||
super().__init__(storage)
|
||||
|
||||
def save(self, item: ShortTermMemoryItem) -> None: # type: ignore # BUG?: Signature of "save" incompatible with supertype "Memory"
|
||||
def save(self, item: ShortTermMemoryItem) -> None:
|
||||
super().save(item.data, item.metadata, item.agent)
|
||||
|
||||
def search(self, query: str, score_threshold: float = 0.35):
|
||||
return self.storage.search(query=query, score_threshold=score_threshold) # type: ignore # BUG? The reference is to the parent class, but the parent class does not have this parameters
|
||||
|
||||
def reset(self) -> None:
|
||||
try:
|
||||
self.storage.reset()
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"An error occurred while resetting the short-term memory: {e}"
|
||||
)
|
||||
|
||||
@@ -9,3 +9,6 @@ class Storage:
|
||||
|
||||
def search(self, key: str) -> Dict[str, Any]: # type: ignore
|
||||
pass
|
||||
|
||||
def reset(self) -> None:
|
||||
pass
|
||||
|
||||
@@ -103,3 +103,20 @@ class LTMSQLiteStorage:
|
||||
color="red",
|
||||
)
|
||||
return None
|
||||
|
||||
def reset(
|
||||
self,
|
||||
) -> None:
|
||||
"""Resets the LTM table with error handling."""
|
||||
try:
|
||||
with sqlite3.connect(self.db_path) as conn:
|
||||
cursor = conn.cursor()
|
||||
cursor.execute("DELETE FROM long_term_memories")
|
||||
conn.commit()
|
||||
|
||||
except sqlite3.Error as e:
|
||||
self._printer.print(
|
||||
content=f"MEMORY ERROR: An error occurred while deleting all rows in LTM: {e}",
|
||||
color="red",
|
||||
)
|
||||
return None
|
||||
|
||||
@@ -2,6 +2,7 @@ import contextlib
|
||||
import io
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from embedchain import App
|
||||
@@ -71,13 +72,13 @@ class RAGStorage(Storage):
|
||||
|
||||
if embedder_config:
|
||||
config["embedder"] = embedder_config
|
||||
|
||||
self.type = type
|
||||
self.app = App.from_config(config=config)
|
||||
self.app.llm = FakeLLM()
|
||||
if allow_reset:
|
||||
self.app.reset()
|
||||
|
||||
def save(self, value: Any, metadata: Dict[str, Any]) -> None: # type: ignore # BUG?: Should be save(key, value, metadata) Signature of "save" incompatible with supertype "Storage"
|
||||
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
|
||||
self._generate_embedding(value, metadata)
|
||||
|
||||
def search( # type: ignore # BUG?: Signature of "search" incompatible with supertype "Storage"
|
||||
@@ -102,3 +103,11 @@ class RAGStorage(Storage):
|
||||
def _generate_embedding(self, text: str, metadata: Dict[str, Any]) -> Any:
|
||||
with suppress_logging():
|
||||
self.app.add(text, data_type="text", metadata=metadata)
|
||||
|
||||
def reset(self) -> None:
|
||||
try:
|
||||
shutil.rmtree(f"{db_storage_path()}/{self.type}")
|
||||
except Exception as e:
|
||||
raise Exception(
|
||||
f"An error occurred while resetting the {self.type} memory: {e}"
|
||||
)
|
||||
|
||||
@@ -8,7 +8,6 @@ from copy import copy
|
||||
from hashlib import md5
|
||||
from typing import Any, Dict, List, Optional, Tuple, Type, Union
|
||||
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from opentelemetry.trace import Span
|
||||
from pydantic import UUID4, BaseModel, Field, field_validator, model_validator
|
||||
@@ -214,8 +213,8 @@ class Task(BaseModel):
|
||||
tools: Optional[List[Any]],
|
||||
) -> TaskOutput:
|
||||
"""Run the core execution logic of the task."""
|
||||
self.agent = agent
|
||||
agent = agent or self.agent
|
||||
self.agent = agent
|
||||
if not agent:
|
||||
raise Exception(
|
||||
f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
|
||||
@@ -329,9 +328,14 @@ class Task(BaseModel):
|
||||
|
||||
def _create_converter(self, *args, **kwargs) -> Converter:
|
||||
"""Create a converter instance."""
|
||||
converter = self.agent.get_output_converter(*args, **kwargs)
|
||||
if self.converter_cls:
|
||||
if self.agent and not self.converter_cls:
|
||||
converter = self.agent.get_output_converter(*args, **kwargs)
|
||||
elif self.converter_cls:
|
||||
converter = self.converter_cls(*args, **kwargs)
|
||||
|
||||
if not converter:
|
||||
raise Exception("No output converter found or set.")
|
||||
|
||||
return converter
|
||||
|
||||
def _export_output(
|
||||
|
||||
@@ -38,10 +38,10 @@ class Converter(OutputConverter):
|
||||
return self._create_instructor().to_json()
|
||||
else:
|
||||
return json.dumps(self._create_chain().invoke({}).model_dump())
|
||||
except Exception:
|
||||
except Exception as e:
|
||||
if current_attempt < self.max_attempts:
|
||||
return self.to_json(current_attempt + 1)
|
||||
return ConverterError("Failed to convert text into JSON.")
|
||||
return ConverterError(f"Failed to convert text into JSON, error: {e}.")
|
||||
|
||||
def _create_instructor(self):
|
||||
"""Create an instructor."""
|
||||
|
||||
@@ -17,6 +17,16 @@ class CrewPydanticOutputParser(PydanticOutputParser):
|
||||
|
||||
def parse_result(self, result: List[Generation], *, partial: bool = False) -> Any:
|
||||
result[0].text = self._transform_in_valid_json(result[0].text)
|
||||
|
||||
# Treating edge case of function calling llm returning the name instead of tool_name
|
||||
json_object = json.loads(result[0].text)
|
||||
json_object["tool_name"] = (
|
||||
json_object["name"]
|
||||
if "tool_name" not in json_object
|
||||
else json_object["tool_name"]
|
||||
)
|
||||
result[0].text = json.dumps(json_object)
|
||||
|
||||
json_object = super().parse_result(result)
|
||||
try:
|
||||
return self.pydantic_object.parse_obj(json_object)
|
||||
|
||||
149
src/crewai/utilities/evaluators/crew_evaluator_handler.py
Normal file
149
src/crewai/utilities/evaluators/crew_evaluator_handler.py
Normal file
@@ -0,0 +1,149 @@
|
||||
from collections import defaultdict
|
||||
|
||||
from langchain_openai import ChatOpenAI
|
||||
from pydantic import BaseModel, Field
|
||||
from rich.console import Console
|
||||
from rich.table import Table
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
|
||||
|
||||
class TaskEvaluationPydanticOutput(BaseModel):
|
||||
quality: float = Field(
|
||||
description="A score from 1 to 10 evaluating on completion, quality, and overall performance from the task_description and task_expected_output to the actual Task Output."
|
||||
)
|
||||
|
||||
|
||||
class CrewEvaluator:
|
||||
"""
|
||||
A class to evaluate the performance of the agents in the crew based on the tasks they have performed.
|
||||
|
||||
Attributes:
|
||||
crew (Crew): The crew of agents to evaluate.
|
||||
openai_model_name (str): The model to use for evaluating the performance of the agents (for now ONLY OpenAI accepted).
|
||||
tasks_scores (defaultdict): A dictionary to store the scores of the agents for each task.
|
||||
iteration (int): The current iteration of the evaluation.
|
||||
"""
|
||||
|
||||
tasks_scores: defaultdict = defaultdict(list)
|
||||
iteration: int = 0
|
||||
|
||||
def __init__(self, crew, openai_model_name: str):
|
||||
self.crew = crew
|
||||
self.openai_model_name = openai_model_name
|
||||
self._setup_for_evaluating()
|
||||
|
||||
def _setup_for_evaluating(self) -> None:
|
||||
"""Sets up the crew for evaluating."""
|
||||
for task in self.crew.tasks:
|
||||
task.callback = self.evaluate
|
||||
|
||||
def set_iteration(self, iteration: int) -> None:
|
||||
self.iteration = iteration
|
||||
|
||||
def _evaluator_agent(self):
|
||||
return Agent(
|
||||
role="Task Execution Evaluator",
|
||||
goal=(
|
||||
"Your goal is to evaluate the performance of the agents in the crew based on the tasks they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
|
||||
),
|
||||
backstory="Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed",
|
||||
verbose=False,
|
||||
llm=ChatOpenAI(model=self.openai_model_name),
|
||||
)
|
||||
|
||||
def _evaluation_task(
|
||||
self, evaluator_agent: Agent, task_to_evaluate: Task, task_output: str
|
||||
) -> Task:
|
||||
return Task(
|
||||
description=(
|
||||
"Based on the task description and the expected output, compare and evaluate the performance of the agents in the crew based on the Task Output they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
|
||||
f"task_description: {task_to_evaluate.description} "
|
||||
f"task_expected_output: {task_to_evaluate.expected_output} "
|
||||
f"agent: {task_to_evaluate.agent.role if task_to_evaluate.agent else None} "
|
||||
f"agent_goal: {task_to_evaluate.agent.goal if task_to_evaluate.agent else None} "
|
||||
f"Task Output: {task_output}"
|
||||
),
|
||||
expected_output="Evaluation Score from 1 to 10 based on the performance of the agents on the tasks",
|
||||
agent=evaluator_agent,
|
||||
output_pydantic=TaskEvaluationPydanticOutput,
|
||||
)
|
||||
|
||||
def print_crew_evaluation_result(self) -> None:
|
||||
"""
|
||||
Prints the evaluation result of the crew in a table.
|
||||
A Crew with 2 tasks using the command crewai test -n 2
|
||||
will output the following table:
|
||||
|
||||
Task Scores
|
||||
(1-10 Higher is better)
|
||||
┏━━━━━━━━━━━━┳━━━━━━━┳━━━━━━━┳━━━━━━━━━━━━┓
|
||||
┃ Tasks/Crew ┃ Run 1 ┃ Run 2 ┃ Avg. Total ┃
|
||||
┡━━━━━━━━━━━━╇━━━━━━━╇━━━━━━━╇━━━━━━━━━━━━┩
|
||||
│ Task 1 │ 10.0 │ 9.0 │ 9.5 │
|
||||
│ Task 2 │ 9.0 │ 9.0 │ 9.0 │
|
||||
│ Crew │ 9.5 │ 9.0 │ 9.2 │
|
||||
└────────────┴───────┴───────┴────────────┘
|
||||
"""
|
||||
task_averages = [
|
||||
sum(scores) / len(scores) for scores in zip(*self.tasks_scores.values())
|
||||
]
|
||||
crew_average = sum(task_averages) / len(task_averages)
|
||||
|
||||
# Create a table
|
||||
table = Table(title="Tasks Scores \n (1-10 Higher is better)")
|
||||
|
||||
# Add columns for the table
|
||||
table.add_column("Tasks/Crew")
|
||||
for run in range(1, len(self.tasks_scores) + 1):
|
||||
table.add_column(f"Run {run}")
|
||||
table.add_column("Avg. Total")
|
||||
|
||||
# Add rows for each task
|
||||
for task_index in range(len(task_averages)):
|
||||
task_scores = [
|
||||
self.tasks_scores[run][task_index]
|
||||
for run in range(1, len(self.tasks_scores) + 1)
|
||||
]
|
||||
avg_score = task_averages[task_index]
|
||||
table.add_row(
|
||||
f"Task {task_index + 1}", *map(str, task_scores), f"{avg_score:.1f}"
|
||||
)
|
||||
|
||||
# Add a row for the crew average
|
||||
crew_scores = [
|
||||
sum(self.tasks_scores[run]) / len(self.tasks_scores[run])
|
||||
for run in range(1, len(self.tasks_scores) + 1)
|
||||
]
|
||||
table.add_row("Crew", *map(str, crew_scores), f"{crew_average:.1f}")
|
||||
|
||||
# Display the table in the terminal
|
||||
console = Console()
|
||||
console.print(table)
|
||||
|
||||
def evaluate(self, task_output: TaskOutput):
|
||||
"""Evaluates the performance of the agents in the crew based on the tasks they have performed."""
|
||||
current_task = None
|
||||
for task in self.crew.tasks:
|
||||
if task.description == task_output.description:
|
||||
current_task = task
|
||||
break
|
||||
|
||||
if not current_task or not task_output:
|
||||
raise ValueError(
|
||||
"Task to evaluate and task output are required for evaluation"
|
||||
)
|
||||
|
||||
evaluator_agent = self._evaluator_agent()
|
||||
evaluation_task = self._evaluation_task(
|
||||
evaluator_agent, current_task, task_output.raw
|
||||
)
|
||||
|
||||
evaluation_result = evaluation_task.execute_sync()
|
||||
|
||||
if isinstance(evaluation_result.pydantic, TaskEvaluationPydanticOutput):
|
||||
self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
|
||||
else:
|
||||
raise ValueError("Evaluation result is not in the expected format")
|
||||
64
src/crewai/utilities/planning_handler.py
Normal file
64
src/crewai/utilities/planning_handler.py
Normal file
@@ -0,0 +1,64 @@
|
||||
from typing import List, Optional
|
||||
|
||||
from pydantic import BaseModel
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
|
||||
|
||||
class PlannerTaskPydanticOutput(BaseModel):
|
||||
list_of_plans_per_task: List[str]
|
||||
|
||||
|
||||
class CrewPlanner:
|
||||
def __init__(self, tasks: List[Task]):
|
||||
self.tasks = tasks
|
||||
|
||||
def _handle_crew_planning(self) -> Optional[BaseModel]:
|
||||
"""Handles the Crew planning by creating detailed step-by-step plans for each task."""
|
||||
planning_agent = self._create_planning_agent()
|
||||
tasks_summary = self._create_tasks_summary()
|
||||
|
||||
planner_task = self._create_planner_task(planning_agent, tasks_summary)
|
||||
|
||||
return planner_task.execute_sync().pydantic
|
||||
|
||||
def _create_planning_agent(self) -> Agent:
|
||||
"""Creates the planning agent for the crew planning."""
|
||||
return Agent(
|
||||
role="Task Execution Planner",
|
||||
goal=(
|
||||
"Your goal is to create an extremely detailed, step-by-step plan based on the tasks and tools "
|
||||
"available to each agent so that they can perform the tasks in an exemplary manner"
|
||||
),
|
||||
backstory="Planner agent for crew planning",
|
||||
)
|
||||
|
||||
def _create_planner_task(self, planning_agent: Agent, tasks_summary: str) -> Task:
|
||||
"""Creates the planner task using the given agent and tasks summary."""
|
||||
return Task(
|
||||
description=(
|
||||
f"Based on these tasks summary: {tasks_summary} \n Create the most descriptive plan based on the tasks "
|
||||
"descriptions, tools available, and agents' goals for them to execute their goals with perfection."
|
||||
),
|
||||
expected_output="Step by step plan on how the agents can execute their tasks using the available tools with mastery",
|
||||
agent=planning_agent,
|
||||
output_pydantic=PlannerTaskPydanticOutput,
|
||||
)
|
||||
|
||||
def _create_tasks_summary(self) -> str:
|
||||
"""Creates a summary of all tasks."""
|
||||
tasks_summary = []
|
||||
for idx, task in enumerate(self.tasks):
|
||||
tasks_summary.append(
|
||||
f"""
|
||||
Task Number {idx + 1} - {task.description}
|
||||
"task_description": {task.description}
|
||||
"task_expected_output": {task.expected_output}
|
||||
"agent": {task.agent.role if task.agent else "None"}
|
||||
"agent_goal": {task.agent.goal if task.agent else "None"}
|
||||
"task_tools": {task.tools}
|
||||
"agent_tools": {task.agent.tools if task.agent else "None"}
|
||||
"""
|
||||
)
|
||||
return " ".join(tasks_summary)
|
||||
2189
tests/cassettes/test_crew_does_not_interpolate_without_inputs.yaml
Normal file
2189
tests/cassettes/test_crew_does_not_interpolate_without_inputs.yaml
Normal file
File diff suppressed because it is too large
Load Diff
262
tests/cassettes/test_json_property_without_output_json.yaml
Normal file
262
tests/cassettes/test_json_property_without_output_json.yaml
Normal file
@@ -0,0 +1,262 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"content": "You are Scorer. You''re an expert scorer, specialized
|
||||
in scoring titles.\nYour personal goal is: Score the titleTo give my best complete
|
||||
final answer to the task use the exact following format:\n\nThought: I now can
|
||||
give a great answer\nFinal Answer: my best complete final answer to the task.\nYour
|
||||
final answer must be the great and the most complete as possible, it must be
|
||||
outcome described.\n\nI MUST use these formats, my job depends on it!\nCurrent
|
||||
Task: Give me an integer score between 1-5 for the following title: ''The impact
|
||||
of AI in the future of work''\n\nThis is the expect criteria for your final
|
||||
answer: The score of the title. \n you MUST return the actual complete content
|
||||
as the final answer, not a summary.\n\nBegin! This is VERY important to you,
|
||||
use the tools available and give your best Final Answer, your job depends on
|
||||
it!\n\nThought:\n", "role": "user"}], "model": "gpt-4o", "n": 1, "stop": ["\nObservation"],
|
||||
"stream": true, "temperature": 0.7}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '997'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.35.14
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.35.14
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.7
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: 'data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"Thought"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
I"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
now"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
can"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
give"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
a"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
great"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"\n"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"Final"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
Answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltULlJQMDDjI2f7PpkjJ7DsxWjEQ","object":"chat.completion.chunk","created":1721201741,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
|
||||
'
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-RAY:
|
||||
- 8a488e81aa7f0c7e-EWR
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- text/event-stream; charset=utf-8
|
||||
Date:
|
||||
- Wed, 17 Jul 2024 07:35:41 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=Ypr9N3lq.OD8hpimnkpN61rAsWyk216I8Tq7RA8.uwQ-1721201741-1.0.1.1-6Cj4aX9I96QHMmPwJBpO1iCFOJsvzq_agUIrl3XS.YhlPuGyA4K9sDONExvLn.cDe3W_p_1ET7Pt_hxjtHPAXQ;
|
||||
path=/; expires=Wed, 17-Jul-24 08:05:41 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=NLa1BaUsRvD7shojIzUH9YSRXQIEzaoJVcq2_gNwqm0-1721201741646-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '106'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=15552000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '10000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '30000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '9999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '29999771'
|
||||
x-ratelimit-reset-requests:
|
||||
- 6ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_e52461a1ab2702e360f6303fbcb4cc3c
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
- request:
|
||||
body: '{"messages": [{"role": "user", "content": "4"}, {"role": "system", "content":
|
||||
"I''m gonna convert this raw text into valid JSON."}], "model": "gpt-4o", "tool_choice":
|
||||
{"type": "function", "function": {"name": "ScoreOutput"}}, "tools": [{"type":
|
||||
"function", "function": {"name": "ScoreOutput", "description": "Correctly extracted
|
||||
`ScoreOutput` with all the required parameters with correct types", "parameters":
|
||||
{"properties": {"score": {"title": "Score", "type": "integer"}}, "required":
|
||||
["score"], "type": "object"}}}]}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '519'
|
||||
content-type:
|
||||
- application/json
|
||||
cookie:
|
||||
- __cf_bm=Ypr9N3lq.OD8hpimnkpN61rAsWyk216I8Tq7RA8.uwQ-1721201741-1.0.1.1-6Cj4aX9I96QHMmPwJBpO1iCFOJsvzq_agUIrl3XS.YhlPuGyA4K9sDONExvLn.cDe3W_p_1ET7Pt_hxjtHPAXQ;
|
||||
_cfuvid=NLa1BaUsRvD7shojIzUH9YSRXQIEzaoJVcq2_gNwqm0-1721201741646-0.0.1.1-604800000
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.35.14
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.35.14
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.7
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA2xS22rjMBB991eIeY6LHTubrR9LKYUttKWUvbUYRRnb6sqSVhqTJiH/vshxYzes
|
||||
H8QwZ86FGe8jxkCuoWAgGk6itSq+VPR8d8O/vXXZbiOzx13S5D9/PNwSv8nvYBYYZvWGgj5YF8K0
|
||||
ViFJo4+wcMgJg2q6nKfzJF3maQ+0Zo0q0GpLcW7ieTLP42QRp9lAbIwU6KFgvyPGGNv3b4io1/gO
|
||||
BUtmH50Wvec1QnEaYgycUaED3HvpiWuC2QgKowl1SK07pSYAGaNKwZUajY/fflKPe+JKlVe/nqvV
|
||||
98vNtslul3/Xj7vrzaZ6StOJ31F6a/tAVafFaT8T/NQvzswYA83bnvskjMP7jmxHZ3TGgLu6a1FT
|
||||
iA77F/Bh+AWK/ACfRg/R/+rXoTqc1qpMbZ1Z+bMtQSW19E3pkPs+LXgy9mgR5F7783WfLgLWmdZS
|
||||
SeYP6iD4dbgejP/LCC4GjAxxNeEsoiEe+K0nbMtK6hqddbI/JVS2FDkuVl8qnqUQHaJ/AAAA//8D
|
||||
ACb4o2zTAgAA
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-RAY:
|
||||
- 8a488e858ca50c7e-EWR
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Encoding:
|
||||
- gzip
|
||||
Content-Type:
|
||||
- application/json
|
||||
Date:
|
||||
- Wed, 17 Jul 2024 07:35:42 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '241'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=15552000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '10000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '30000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '9999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '29999968'
|
||||
x-ratelimit-reset-requests:
|
||||
- 6ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_01c2f40fe9c73f883b7ed5b60c8067e5
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
9599
tests/cassettes/test_manager_agent_delegating_to_all_agents.yaml
Normal file
9599
tests/cassettes/test_manager_agent_delegating_to_all_agents.yaml
Normal file
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
159
tests/cassettes/test_replay_from_task_with_context.yaml
Normal file
159
tests/cassettes/test_replay_from_task_with_context.yaml
Normal file
@@ -0,0 +1,159 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"content": "You are test_agent. Test Description\nYour personal
|
||||
goal is: Test GoalTo give my best complete final answer to the task use the
|
||||
exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
|
||||
my best complete final answer to the task.\nYour final answer must be the great
|
||||
and the most complete as possible, it must be outcome described.\n\nI MUST use
|
||||
these formats, my job depends on it!\nCurrent Task: Test Task\n\nThis is the
|
||||
expect criteria for your final answer: Say Hi \n you MUST return the actual
|
||||
complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\ncontext raw output\n\nBegin! This is VERY important to
|
||||
you, use the tools available and give your best Final Answer, your job depends
|
||||
on it!\n\nThought:\n", "role": "user"}], "model": "gpt-4o", "n": 1, "stop":
|
||||
["\nObservation"], "stream": true, "temperature": 0.7}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '910'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.35.14
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.35.14
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.7
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: 'data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"Thought"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
I"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
now"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
can"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
give"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
a"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
great"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"\n"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"Final"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
Answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
Hi"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9ltUKsvuhS55Ng70kOmxKNQcksxm4","object":"chat.completion.chunk","created":1721201740,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
|
||||
'
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-RAY:
|
||||
- 8a488e7859304288-EWR
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- text/event-stream; charset=utf-8
|
||||
Date:
|
||||
- Wed, 17 Jul 2024 07:35:40 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=tBullt04HIYduv1QEpAn86_ghx9PQ4DMe9LwLp7.bu4-1721201740-1.0.1.1-sGn3OYi9ntPGIzwuC4RH0UTUQFVy.BUFIvy9v9IrjorYeAsecsuuuREs7b19i4dXygWnZZEkH6cxvnQeJ62g7g;
|
||||
path=/; expires=Wed, 17-Jul-24 08:05:40 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=uSSEccAecytxcWKGJoaQXLVBAwHLrZ.QckpcK..rN48-1721201740355-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '83'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=15552000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '10000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '30000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '9999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '29999793'
|
||||
x-ratelimit-reset-requests:
|
||||
- 6ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_89b4947eaf51788bd7a69d6cc0da6c08
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
163
tests/cassettes/test_replay_setup_context.yaml
Normal file
163
tests/cassettes/test_replay_setup_context.yaml
Normal file
@@ -0,0 +1,163 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"content": "You are test_agent. Test Description\nYour personal
|
||||
goal is: Test GoalTo give my best complete final answer to the task use the
|
||||
exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
|
||||
my best complete final answer to the task.\nYour final answer must be the great
|
||||
and the most complete as possible, it must be outcome described.\n\nI MUST use
|
||||
these formats, my job depends on it!\nCurrent Task: Test Task\n\nThis is the
|
||||
expect criteria for your final answer: Say Hi to John \n you MUST return the
|
||||
actual complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\ncontext raw output\n\nBegin! This is VERY important to
|
||||
you, use the tools available and give your best Final Answer, your job depends
|
||||
on it!\n\nThought:\n", "role": "user"}], "model": "gpt-4o", "logprobs": false,
|
||||
"n": 1, "stop": ["\nObservation"], "stream": true, "temperature": 0.7}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '937'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.36.0
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.36.0
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.7
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: 'data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"Thought"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
I"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
now"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
can"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
give"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
a"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
great"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"\n"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"Final"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
Answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
Hi"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{"content":"
|
||||
John"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wQ7bzZKcXAmiNgs4nn5Of0EFiM","object":"chat.completion.chunk","created":1721491782,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_400f27fa1f","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
|
||||
'
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-RAY:
|
||||
- 8a643794fe0341e9-EWR
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- text/event-stream; charset=utf-8
|
||||
Date:
|
||||
- Sat, 20 Jul 2024 16:09:42 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=7kfE3khl2E.6zM44yel5nToHzdtz0QeQ4wkLuGYyqSs-1721491782-1.0.1.1-XUb95eXTriHvSUSCH.TCyAmCGCbPK6L7p_tRTDBon8Fo6ns8TDbDoDGA.wVCFI4MTXSxkqrjD0GpYDj4GBTeSQ;
|
||||
path=/; expires=Sat, 20-Jul-24 16:39:42 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=iN41lAEk.DjpRMAtG.K0NEvIN0xB9eS0CUCU2iWmjv4-1721491782137-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '104'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=15552000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '10000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '30000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '9999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '29999791'
|
||||
x-ratelimit-reset-requests:
|
||||
- 6ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_4d90924dd28a0fb48c857f03515f0ca8
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
159
tests/cassettes/test_replay_with_context.yaml
Normal file
159
tests/cassettes/test_replay_with_context.yaml
Normal file
@@ -0,0 +1,159 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"messages": [{"content": "You are test_agent. Test Description\nYour personal
|
||||
goal is: Test GoalTo give my best complete final answer to the task use the
|
||||
exact following format:\n\nThought: I now can give a great answer\nFinal Answer:
|
||||
my best complete final answer to the task.\nYour final answer must be the great
|
||||
and the most complete as possible, it must be outcome described.\n\nI MUST use
|
||||
these formats, my job depends on it!\nCurrent Task: Test Task\n\nThis is the
|
||||
expect criteria for your final answer: Say Hi \n you MUST return the actual
|
||||
complete content as the final answer, not a summary.\n\nThis is the context
|
||||
you''re working with:\ncontext raw output\n\nBegin! This is VERY important to
|
||||
you, use the tools available and give your best Final Answer, your job depends
|
||||
on it!\n\nThought:\n", "role": "user"}], "model": "gpt-4o", "logprobs": false,
|
||||
"n": 1, "stop": ["\nObservation"], "stream": true, "temperature": 0.7}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '929'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.36.0
|
||||
x-stainless-arch:
|
||||
- arm64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- MacOS
|
||||
x-stainless-package-version:
|
||||
- 1.36.0
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.11.7
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/chat/completions
|
||||
response:
|
||||
body:
|
||||
string: 'data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"role":"assistant","content":""},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"Thought"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
I"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
now"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
can"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
give"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
a"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
great"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"\n"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"Final"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
Answer"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{"content":"
|
||||
Hi"},"logprobs":null,"finish_reason":null}]}
|
||||
|
||||
|
||||
data: {"id":"chatcmpl-9n6wPAClsh4tUGoLYKLh3VoX1vlAx","object":"chat.completion.chunk","created":1721491781,"model":"gpt-4o-2024-05-13","system_fingerprint":"fp_c4e5b6fa31","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}]}
|
||||
|
||||
|
||||
data: [DONE]
|
||||
|
||||
|
||||
'
|
||||
headers:
|
||||
CF-Cache-Status:
|
||||
- DYNAMIC
|
||||
CF-RAY:
|
||||
- 8a643791a80e8c96-EWR
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Type:
|
||||
- text/event-stream; charset=utf-8
|
||||
Date:
|
||||
- Sat, 20 Jul 2024 16:09:41 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=cam5sECdaTzbttLIOaiuvh9flDIAXp_FLPODnDEOn6k-1721491781-1.0.1.1-hyFl43P7HIWZsGueyWuDeO579sZ41as2mvrM.cQS1E8KSLG2ZZ0DxDGbVvHYRO0eflTUJohgZu6CGltvjQfMtQ;
|
||||
path=/; expires=Sat, 20-Jul-24 16:39:41 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=nmlgS.bqXAu0rZ.OlHPfXrIrdnVgrBSW3e0UuU3N5ng-1721491781661-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
Transfer-Encoding:
|
||||
- chunked
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
openai-organization:
|
||||
- crewai-iuxna1
|
||||
openai-processing-ms:
|
||||
- '126'
|
||||
openai-version:
|
||||
- '2020-10-01'
|
||||
strict-transport-security:
|
||||
- max-age=15552000; includeSubDomains; preload
|
||||
x-ratelimit-limit-requests:
|
||||
- '10000'
|
||||
x-ratelimit-limit-tokens:
|
||||
- '30000000'
|
||||
x-ratelimit-remaining-requests:
|
||||
- '9999'
|
||||
x-ratelimit-remaining-tokens:
|
||||
- '29999794'
|
||||
x-ratelimit-reset-requests:
|
||||
- 6ms
|
||||
x-ratelimit-reset-tokens:
|
||||
- 0s
|
||||
x-request-id:
|
||||
- req_31484eeb0af939af4e0d9c47441ba2db
|
||||
status:
|
||||
code: 200
|
||||
message: OK
|
||||
version: 1
|
||||
@@ -3,7 +3,7 @@ from unittest import mock
|
||||
import pytest
|
||||
from click.testing import CliRunner
|
||||
|
||||
from crewai.cli.cli import train, version
|
||||
from crewai.cli.cli import reset_memories, test, train, version
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
@@ -41,6 +41,82 @@ def test_train_invalid_string_iterations(train_crew, runner):
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.reset_memories_command.ShortTermMemory")
|
||||
@mock.patch("crewai.cli.reset_memories_command.EntityMemory")
|
||||
@mock.patch("crewai.cli.reset_memories_command.LongTermMemory")
|
||||
@mock.patch("crewai.cli.reset_memories_command.TaskOutputStorageHandler")
|
||||
def test_reset_all_memories(
|
||||
MockTaskOutputStorageHandler,
|
||||
MockLongTermMemory,
|
||||
MockEntityMemory,
|
||||
MockShortTermMemory,
|
||||
runner,
|
||||
):
|
||||
result = runner.invoke(reset_memories, ["--all"])
|
||||
MockShortTermMemory().reset.assert_called_once()
|
||||
MockEntityMemory().reset.assert_called_once()
|
||||
MockLongTermMemory().reset.assert_called_once()
|
||||
MockTaskOutputStorageHandler().reset.assert_called_once()
|
||||
|
||||
assert result.output == "All memories have been reset.\n"
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.reset_memories_command.ShortTermMemory")
|
||||
def test_reset_short_term_memories(MockShortTermMemory, runner):
|
||||
result = runner.invoke(reset_memories, ["-s"])
|
||||
MockShortTermMemory().reset.assert_called_once()
|
||||
assert result.output == "Short term memory has been reset.\n"
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.reset_memories_command.EntityMemory")
|
||||
def test_reset_entity_memories(MockEntityMemory, runner):
|
||||
result = runner.invoke(reset_memories, ["-e"])
|
||||
MockEntityMemory().reset.assert_called_once()
|
||||
assert result.output == "Entity memory has been reset.\n"
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.reset_memories_command.LongTermMemory")
|
||||
def test_reset_long_term_memories(MockLongTermMemory, runner):
|
||||
result = runner.invoke(reset_memories, ["-l"])
|
||||
MockLongTermMemory().reset.assert_called_once()
|
||||
assert result.output == "Long term memory has been reset.\n"
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.reset_memories_command.TaskOutputStorageHandler")
|
||||
def test_reset_kickoff_outputs(MockTaskOutputStorageHandler, runner):
|
||||
result = runner.invoke(reset_memories, ["-k"])
|
||||
MockTaskOutputStorageHandler().reset.assert_called_once()
|
||||
assert result.output == "Latest Kickoff outputs stored has been reset.\n"
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.reset_memories_command.ShortTermMemory")
|
||||
@mock.patch("crewai.cli.reset_memories_command.LongTermMemory")
|
||||
def test_reset_multiple_memory_flags(MockShortTermMemory, MockLongTermMemory, runner):
|
||||
result = runner.invoke(
|
||||
reset_memories,
|
||||
[
|
||||
"-s",
|
||||
"-l",
|
||||
],
|
||||
)
|
||||
MockShortTermMemory().reset.assert_called_once()
|
||||
MockLongTermMemory().reset.assert_called_once()
|
||||
assert (
|
||||
result.output
|
||||
== "Long term memory has been reset.\nShort term memory has been reset.\n"
|
||||
)
|
||||
|
||||
|
||||
def test_reset_no_memory_flags(runner):
|
||||
result = runner.invoke(
|
||||
reset_memories,
|
||||
)
|
||||
assert (
|
||||
result.output
|
||||
== "Please specify at least one memory type to reset using the appropriate flags.\n"
|
||||
)
|
||||
|
||||
|
||||
def test_version_command(runner):
|
||||
result = runner.invoke(version)
|
||||
|
||||
@@ -57,3 +133,33 @@ def test_version_command_with_tools(runner):
|
||||
"crewai tools version:" in result.output
|
||||
or "crewai tools not installed" in result.output
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_default_iterations(test_crew, runner):
|
||||
result = runner.invoke(test)
|
||||
|
||||
test_crew.assert_called_once_with(3, "gpt-4o-mini")
|
||||
assert result.exit_code == 0
|
||||
assert "Testing the crew for 3 iterations with model gpt-4o-mini" in result.output
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_custom_iterations(test_crew, runner):
|
||||
result = runner.invoke(test, ["--n_iterations", "5", "--model", "gpt-4o"])
|
||||
|
||||
test_crew.assert_called_once_with(5, "gpt-4o")
|
||||
assert result.exit_code == 0
|
||||
assert "Testing the crew for 5 iterations with model gpt-4o" in result.output
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.cli.test_crew")
|
||||
def test_test_invalid_string_iterations(test_crew, runner):
|
||||
result = runner.invoke(test, ["--n_iterations", "invalid"])
|
||||
|
||||
test_crew.assert_not_called()
|
||||
assert result.exit_code == 2
|
||||
assert (
|
||||
"Usage: test [OPTIONS]\nTry 'test --help' for help.\n\nError: Invalid value for '-n' / '--n_iterations': 'invalid' is not a valid integer.\n"
|
||||
in result.output
|
||||
)
|
||||
|
||||
97
tests/cli/test_crew_test.py
Normal file
97
tests/cli/test_crew_test.py
Normal file
@@ -0,0 +1,97 @@
|
||||
import subprocess
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.cli import test_crew
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"n_iterations,model",
|
||||
[
|
||||
(1, "gpt-4o"),
|
||||
(5, "gpt-3.5-turbo"),
|
||||
(10, "gpt-4"),
|
||||
],
|
||||
)
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
def test_crew_success(mock_subprocess_run, n_iterations, model):
|
||||
"""Test the crew function for successful execution."""
|
||||
mock_subprocess_run.return_value = subprocess.CompletedProcess(
|
||||
args=f"poetry run test {n_iterations} {model}", returncode=0
|
||||
)
|
||||
result = test_crew.test_crew(n_iterations, model)
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", str(n_iterations), model],
|
||||
capture_output=False,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
assert result is None
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
def test_test_crew_zero_iterations(click):
|
||||
test_crew.test_crew(0, "gpt-4o")
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: The number of iterations must be a positive integer.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
def test_test_crew_negative_iterations(click):
|
||||
test_crew.test_crew(-2, "gpt-4o")
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: The number of iterations must be a positive integer.",
|
||||
err=True,
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
def test_test_crew_called_process_error(mock_subprocess_run, click):
|
||||
n_iterations = 5
|
||||
mock_subprocess_run.side_effect = subprocess.CalledProcessError(
|
||||
returncode=1,
|
||||
cmd=["poetry", "run", "test", str(n_iterations), "gpt-4o"],
|
||||
output="Error",
|
||||
stderr="Some error occurred",
|
||||
)
|
||||
test_crew.test_crew(n_iterations, "gpt-4o")
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", "5", "gpt-4o"],
|
||||
capture_output=False,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
click.echo.assert_has_calls(
|
||||
[
|
||||
mock.call.echo(
|
||||
"An error occurred while testing the crew: Command '['poetry', 'run', 'test', '5', 'gpt-4o']' returned non-zero exit status 1.",
|
||||
err=True,
|
||||
),
|
||||
mock.call.echo("Error", err=True),
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
@mock.patch("crewai.cli.test_crew.click")
|
||||
@mock.patch("crewai.cli.test_crew.subprocess.run")
|
||||
def test_test_crew_unexpected_exception(mock_subprocess_run, click):
|
||||
# Arrange
|
||||
n_iterations = 5
|
||||
mock_subprocess_run.side_effect = Exception("Unexpected error")
|
||||
test_crew.test_crew(n_iterations, "gpt-4o")
|
||||
|
||||
mock_subprocess_run.assert_called_once_with(
|
||||
["poetry", "run", "test", "5", "gpt-4o"],
|
||||
capture_output=False,
|
||||
text=True,
|
||||
check=True,
|
||||
)
|
||||
click.echo.assert_called_once_with(
|
||||
"An unexpected error occurred: Unexpected error", err=True
|
||||
)
|
||||
@@ -8,6 +8,7 @@ from unittest.mock import MagicMock, patch
|
||||
|
||||
import pydantic_core
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.agents.cache import CacheHandler
|
||||
from crewai.crew import Crew
|
||||
@@ -286,7 +287,7 @@ def test_hierarchical_process():
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
process=Process.hierarchical,
|
||||
manager_llm=ChatOpenAI(temperature=0, model="gpt-4"),
|
||||
manager_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
|
||||
tasks=[task],
|
||||
)
|
||||
|
||||
@@ -312,6 +313,82 @@ def test_manager_llm_requirement_for_hierarchical_process():
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_manager_agent_delegating_to_assigned_task_agent():
|
||||
"""
|
||||
Test that the manager agent delegates to the assigned task agent.
|
||||
"""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
task = Task(
|
||||
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
|
||||
expected_output="5 bullet points with a paragraph for each idea.",
|
||||
agent=researcher,
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
process=Process.hierarchical,
|
||||
manager_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
|
||||
tasks=[task],
|
||||
)
|
||||
|
||||
crew.kickoff()
|
||||
|
||||
# Check if the manager agent has the correct tools
|
||||
assert crew.manager_agent is not None
|
||||
assert crew.manager_agent.tools is not None
|
||||
|
||||
assert len(crew.manager_agent.tools) == 2
|
||||
assert (
|
||||
"Delegate a specific task to one of the following coworkers: Researcher\n"
|
||||
in crew.manager_agent.tools[0].description
|
||||
)
|
||||
assert (
|
||||
"Ask a specific question to one of the following coworkers: Researcher\n"
|
||||
in crew.manager_agent.tools[1].description
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_manager_agent_delegating_to_all_agents():
|
||||
"""
|
||||
Test that the manager agent delegates to all agents when none are specified.
|
||||
"""
|
||||
from langchain_openai import ChatOpenAI
|
||||
|
||||
task = Task(
|
||||
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
|
||||
expected_output="5 bullet points with a paragraph for each idea.",
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
process=Process.hierarchical,
|
||||
manager_llm=ChatOpenAI(temperature=0, model="gpt-4o"),
|
||||
tasks=[task],
|
||||
)
|
||||
|
||||
crew.kickoff()
|
||||
|
||||
assert crew.manager_agent is not None
|
||||
assert crew.manager_agent.tools is not None
|
||||
|
||||
assert len(crew.manager_agent.tools) == 2
|
||||
print(
|
||||
"crew.manager_agent.tools[0].description",
|
||||
crew.manager_agent.tools[0].description,
|
||||
)
|
||||
assert (
|
||||
"Delegate a specific task to one of the following coworkers: Researcher, Senior Writer\n"
|
||||
in crew.manager_agent.tools[0].description
|
||||
)
|
||||
assert (
|
||||
"Ask a specific question to one of the following coworkers: Researcher, Senior Writer\n"
|
||||
in crew.manager_agent.tools[1].description
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_crew_with_delegating_agents():
|
||||
tasks = [
|
||||
@@ -1358,6 +1435,7 @@ def test_crew_inputs_interpolate_both_agents_and_tasks_diff():
|
||||
interpolate_task_inputs.assert_called()
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_crew_does_not_interpolate_without_inputs():
|
||||
from unittest.mock import patch
|
||||
|
||||
@@ -1840,13 +1918,13 @@ def test_replay_feature():
|
||||
)
|
||||
|
||||
crew.kickoff()
|
||||
crew.replay_from_task(str(write.id))
|
||||
crew.replay(str(write.id))
|
||||
# Ensure context was passed correctly
|
||||
assert mock_execute_task.call_count == 3
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_crew_replay_from_task_error():
|
||||
def test_crew_replay_error():
|
||||
task = Task(
|
||||
description="Come up with a list of 5 interesting ideas to explore for an article",
|
||||
expected_output="5 bullet points with a paragraph for each idea.",
|
||||
@@ -1859,7 +1937,7 @@ def test_crew_replay_from_task_error():
|
||||
)
|
||||
|
||||
with pytest.raises(TypeError) as e:
|
||||
crew.replay_from_task() # type: ignore purposefully throwing err
|
||||
crew.replay() # type: ignore purposefully throwing err
|
||||
assert "task_id is required" in str(e)
|
||||
|
||||
|
||||
@@ -1994,13 +2072,14 @@ def test_replay_task_with_context():
|
||||
with patch.object(Task, "execute_sync") as mock_replay_task:
|
||||
mock_replay_task.return_value = mock_task_output4
|
||||
|
||||
replayed_output = crew.replay_from_task(str(task4.id))
|
||||
replayed_output = crew.replay(str(task4.id))
|
||||
assert replayed_output.raw == "Presentation on AI advancements..."
|
||||
|
||||
db_handler.reset()
|
||||
|
||||
|
||||
def test_replay_from_task_with_context():
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_replay_with_context():
|
||||
agent = Agent(role="test_agent", backstory="Test Description", goal="Test Goal")
|
||||
task1 = Task(
|
||||
description="Context Task", expected_output="Say Task Output", agent=agent
|
||||
@@ -2052,7 +2131,7 @@ def test_replay_from_task_with_context():
|
||||
},
|
||||
],
|
||||
):
|
||||
crew.replay_from_task(str(task2.id))
|
||||
crew.replay(str(task2.id))
|
||||
|
||||
assert crew.tasks[1].context[0].output.raw == "context raw output"
|
||||
|
||||
@@ -2114,9 +2193,10 @@ def test_replay_with_invalid_task_id():
|
||||
ValueError,
|
||||
match="Task with id bf5b09c9-69bd-4eb8-be12-f9e5bae31c2d not found in the crew's tasks.",
|
||||
):
|
||||
crew.replay_from_task("bf5b09c9-69bd-4eb8-be12-f9e5bae31c2d")
|
||||
crew.replay("bf5b09c9-69bd-4eb8-be12-f9e5bae31c2d")
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
@patch.object(Crew, "_interpolate_inputs")
|
||||
def test_replay_interpolates_inputs_properly(mock_interpolate_inputs):
|
||||
agent = Agent(role="test_agent", backstory="Test Description", goal="Test Goal")
|
||||
@@ -2172,13 +2252,13 @@ def test_replay_interpolates_inputs_properly(mock_interpolate_inputs):
|
||||
},
|
||||
],
|
||||
):
|
||||
crew.replay_from_task(str(task2.id))
|
||||
crew.replay(str(task2.id))
|
||||
assert crew._inputs == {"name": "John"}
|
||||
assert mock_interpolate_inputs.call_count == 2
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_replay_from_task_setup_context():
|
||||
def test_replay_setup_context():
|
||||
agent = Agent(role="test_agent", backstory="Test Description", goal="Test Goal")
|
||||
task1 = Task(description="Context Task", expected_output="Say {name}", agent=agent)
|
||||
task2 = Task(
|
||||
@@ -2227,7 +2307,7 @@ def test_replay_from_task_setup_context():
|
||||
},
|
||||
],
|
||||
):
|
||||
crew.replay_from_task(str(task2.id))
|
||||
crew.replay(str(task2.id))
|
||||
|
||||
# Check if the first task's output was set correctly
|
||||
assert crew.tasks[0].output is not None
|
||||
@@ -2420,3 +2500,34 @@ def test_conditional_should_execute():
|
||||
assert condition_mock.call_count == 1
|
||||
assert condition_mock() is True
|
||||
assert mock_execute_sync.call_count == 2
|
||||
|
||||
|
||||
@mock.patch("crewai.crew.CrewEvaluator")
|
||||
@mock.patch("crewai.crew.Crew.kickoff")
|
||||
def test_crew_testing_function(mock_kickoff, crew_evaluator):
|
||||
task = Task(
|
||||
description="Come up with a list of 5 interesting ideas to explore for an article, then write one amazing paragraph highlight for each idea that showcases how good an article about this topic could be. Return the list of ideas with their paragraph and your notes.",
|
||||
expected_output="5 bullet points with a paragraph for each idea.",
|
||||
agent=researcher,
|
||||
)
|
||||
|
||||
crew = Crew(
|
||||
agents=[researcher],
|
||||
tasks=[task],
|
||||
)
|
||||
n_iterations = 2
|
||||
crew.test(n_iterations, openai_model_name="gpt-4o-mini", inputs={"topic": "AI"})
|
||||
|
||||
assert len(mock_kickoff.mock_calls) == n_iterations
|
||||
mock_kickoff.assert_has_calls(
|
||||
[mock.call(inputs={"topic": "AI"}), mock.call(inputs={"topic": "AI"})]
|
||||
)
|
||||
|
||||
crew_evaluator.assert_has_calls(
|
||||
[
|
||||
mock.call(crew, "gpt-4o-mini"),
|
||||
mock.call().set_iteration(1),
|
||||
mock.call().set_iteration(2),
|
||||
mock.call().print_crew_evaluation_result(),
|
||||
]
|
||||
)
|
||||
|
||||
@@ -5,12 +5,13 @@ import json
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
from pydantic import BaseModel
|
||||
from pydantic_core import ValidationError
|
||||
|
||||
from crewai import Agent, Crew, Process, Task
|
||||
from crewai.tasks.conditional_task import ConditionalTask
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.converter import Converter
|
||||
from pydantic import BaseModel
|
||||
from pydantic_core import ValidationError
|
||||
|
||||
|
||||
def test_task_tool_reflect_agent_tools():
|
||||
@@ -318,6 +319,7 @@ def test_output_json_hierarchical():
|
||||
assert result.to_dict() == {"score": 4}
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
def test_json_property_without_output_json():
|
||||
class ScoreOutput(BaseModel):
|
||||
score: int
|
||||
@@ -398,8 +400,8 @@ def test_output_json_dict_hierarchical():
|
||||
manager_llm=ChatOpenAI(model="gpt-4o"),
|
||||
)
|
||||
result = crew.kickoff()
|
||||
assert {"score": 4} == result.json_dict
|
||||
assert result.to_dict() == {"score": 4}
|
||||
assert {"score": 5} == result.json_dict
|
||||
assert result.to_dict() == {"score": 5}
|
||||
|
||||
|
||||
@pytest.mark.vcr(filter_headers=["authorization"])
|
||||
|
||||
113
tests/utilities/evaluators/test_crew_evaluator_handler.py
Normal file
113
tests/utilities/evaluators/test_crew_evaluator_handler.py
Normal file
@@ -0,0 +1,113 @@
|
||||
from unittest import mock
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.crew import Crew
|
||||
from crewai.task import Task
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
from crewai.utilities.evaluators.crew_evaluator_handler import (
|
||||
CrewEvaluator,
|
||||
TaskEvaluationPydanticOutput,
|
||||
)
|
||||
|
||||
|
||||
class TestCrewEvaluator:
|
||||
@pytest.fixture
|
||||
def crew_planner(self):
|
||||
agent = Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1")
|
||||
task = Task(
|
||||
description="Task 1",
|
||||
expected_output="Output 1",
|
||||
agent=agent,
|
||||
)
|
||||
crew = Crew(agents=[agent], tasks=[task])
|
||||
|
||||
return CrewEvaluator(crew, openai_model_name="gpt-4o-mini")
|
||||
|
||||
def test_setup_for_evaluating(self, crew_planner):
|
||||
crew_planner._setup_for_evaluating()
|
||||
assert crew_planner.crew.tasks[0].callback == crew_planner.evaluate
|
||||
|
||||
def test_set_iteration(self, crew_planner):
|
||||
crew_planner.set_iteration(1)
|
||||
assert crew_planner.iteration == 1
|
||||
|
||||
def test_evaluator_agent(self, crew_planner):
|
||||
agent = crew_planner._evaluator_agent()
|
||||
assert agent.role == "Task Execution Evaluator"
|
||||
assert (
|
||||
agent.goal
|
||||
== "Your goal is to evaluate the performance of the agents in the crew based on the tasks they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
|
||||
)
|
||||
assert (
|
||||
agent.backstory
|
||||
== "Evaluator agent for crew evaluation with precise capabilities to evaluate the performance of the agents in the crew based on the tasks they have performed"
|
||||
)
|
||||
assert agent.verbose is False
|
||||
assert agent.llm.model_name == "gpt-4o-mini"
|
||||
|
||||
def test_evaluation_task(self, crew_planner):
|
||||
evaluator_agent = Agent(
|
||||
role="Evaluator Agent",
|
||||
goal="Evaluate the performance of the agents in the crew",
|
||||
backstory="Master in Evaluation",
|
||||
)
|
||||
task_to_evaluate = Task(
|
||||
description="Task 1",
|
||||
expected_output="Output 1",
|
||||
agent=Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1"),
|
||||
)
|
||||
task_output = "Task Output 1"
|
||||
task = crew_planner._evaluation_task(
|
||||
evaluator_agent, task_to_evaluate, task_output
|
||||
)
|
||||
|
||||
assert task.description.startswith(
|
||||
"Based on the task description and the expected output, compare and evaluate the performance of the agents in the crew based on the Task Output they have performed using score from 1 to 10 evaluating on completion, quality, and overall performance."
|
||||
)
|
||||
|
||||
assert task.agent == evaluator_agent
|
||||
assert (
|
||||
task.description
|
||||
== "Based on the task description and the expected output, compare and evaluate "
|
||||
"the performance of the agents in the crew based on the Task Output they have "
|
||||
"performed using score from 1 to 10 evaluating on completion, quality, and overall "
|
||||
"performance.task_description: Task 1 task_expected_output: Output 1 "
|
||||
"agent: Agent 1 agent_goal: Goal 1 Task Output: Task Output 1"
|
||||
)
|
||||
|
||||
@mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Console")
|
||||
@mock.patch("crewai.utilities.evaluators.crew_evaluator_handler.Table")
|
||||
def test_print_crew_evaluation_result(self, table, console, crew_planner):
|
||||
crew_planner.tasks_scores = {
|
||||
1: [10, 9, 8],
|
||||
2: [9, 8, 7],
|
||||
}
|
||||
|
||||
crew_planner.print_crew_evaluation_result()
|
||||
|
||||
table.assert_has_calls(
|
||||
[
|
||||
mock.call(title="Tasks Scores \n (1-10 Higher is better)"),
|
||||
mock.call().add_column("Tasks/Crew"),
|
||||
mock.call().add_column("Run 1"),
|
||||
mock.call().add_column("Run 2"),
|
||||
mock.call().add_column("Avg. Total"),
|
||||
mock.call().add_row("Task 1", "10", "9", "9.5"),
|
||||
mock.call().add_row("Task 2", "9", "8", "8.5"),
|
||||
mock.call().add_row("Task 3", "8", "7", "7.5"),
|
||||
mock.call().add_row("Crew", "9.0", "8.0", "8.5"),
|
||||
]
|
||||
)
|
||||
console.assert_has_calls([mock.call(), mock.call().print(table())])
|
||||
|
||||
def test_evaluate(self, crew_planner):
|
||||
task_output = TaskOutput(
|
||||
description="Task 1", agent=str(crew_planner.crew.agents[0])
|
||||
)
|
||||
|
||||
with mock.patch.object(Task, "execute_sync") as execute:
|
||||
execute().pydantic = TaskEvaluationPydanticOutput(quality=9.5)
|
||||
crew_planner.evaluate(task_output)
|
||||
assert crew_planner.tasks_scores[0] == [9.5]
|
||||
74
tests/utilities/test_planning_handler.py
Normal file
74
tests/utilities/test_planning_handler.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from unittest.mock import patch
|
||||
from crewai.tasks.task_output import TaskOutput
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai.agent import Agent
|
||||
from crewai.task import Task
|
||||
from crewai.utilities.planning_handler import CrewPlanner, PlannerTaskPydanticOutput
|
||||
|
||||
|
||||
class TestCrewPlanner:
|
||||
@pytest.fixture
|
||||
def crew_planner(self):
|
||||
tasks = [
|
||||
Task(
|
||||
description="Task 1",
|
||||
expected_output="Output 1",
|
||||
agent=Agent(role="Agent 1", goal="Goal 1", backstory="Backstory 1"),
|
||||
),
|
||||
Task(
|
||||
description="Task 2",
|
||||
expected_output="Output 2",
|
||||
agent=Agent(role="Agent 2", goal="Goal 2", backstory="Backstory 2"),
|
||||
),
|
||||
Task(
|
||||
description="Task 3",
|
||||
expected_output="Output 3",
|
||||
agent=Agent(role="Agent 3", goal="Goal 3", backstory="Backstory 3"),
|
||||
),
|
||||
]
|
||||
return CrewPlanner(tasks)
|
||||
|
||||
def test_handle_crew_planning(self, crew_planner):
|
||||
with patch.object(Task, "execute_sync") as execute:
|
||||
execute.return_value = TaskOutput(
|
||||
description="Description",
|
||||
agent="agent",
|
||||
pydantic=PlannerTaskPydanticOutput(
|
||||
list_of_plans_per_task=["Plan 1", "Plan 2", "Plan 3"]
|
||||
),
|
||||
)
|
||||
result = crew_planner._handle_crew_planning()
|
||||
|
||||
assert isinstance(result, PlannerTaskPydanticOutput)
|
||||
assert len(result.list_of_plans_per_task) == len(crew_planner.tasks)
|
||||
execute.assert_called_once()
|
||||
|
||||
def test_create_planning_agent(self, crew_planner):
|
||||
agent = crew_planner._create_planning_agent()
|
||||
assert isinstance(agent, Agent)
|
||||
assert agent.role == "Task Execution Planner"
|
||||
|
||||
def test_create_planner_task(self, crew_planner):
|
||||
planning_agent = Agent(
|
||||
role="Planning Agent",
|
||||
goal="Plan Step by Step Plan",
|
||||
backstory="Master in Planning",
|
||||
)
|
||||
tasks_summary = "Summary of tasks"
|
||||
task = crew_planner._create_planner_task(planning_agent, tasks_summary)
|
||||
|
||||
assert isinstance(task, Task)
|
||||
assert task.description.startswith("Based on these tasks summary")
|
||||
assert task.agent == planning_agent
|
||||
assert (
|
||||
task.expected_output
|
||||
== "Step by step plan on how the agents can execute their tasks using the available tools with mastery"
|
||||
)
|
||||
|
||||
def test_create_tasks_summary(self, crew_planner):
|
||||
tasks_summary = crew_planner._create_tasks_summary()
|
||||
assert isinstance(tasks_summary, str)
|
||||
assert tasks_summary.startswith("\n Task Number 1 - Task 1")
|
||||
assert tasks_summary.endswith('"agent_tools": []\n ')
|
||||
Reference in New Issue
Block a user