diff --git a/.github/workflows/mkdocs.yml b/.github/workflows/mkdocs.yml index 11acce062..a15ae0363 100644 --- a/.github/workflows/mkdocs.yml +++ b/.github/workflows/mkdocs.yml @@ -1,10 +1,8 @@ name: Deploy MkDocs on: - workflow_dispatch: - push: - branches: - - main + release: + types: [published] permissions: contents: write diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml index 624bf34f4..546b983a7 100644 --- a/.github/workflows/stale.yml +++ b/.github/workflows/stale.yml @@ -24,3 +24,4 @@ jobs: stale-pr-message: 'This PR is stale because it has been open for 45 days with no activity.' days-before-pr-stale: 45 days-before-pr-close: -1 + operations-per-run: 1200 diff --git a/docs/core-concepts/Cli.md b/docs/core-concepts/Cli.md new file mode 100644 index 000000000..15546dcd7 --- /dev/null +++ b/docs/core-concepts/Cli.md @@ -0,0 +1,142 @@ +# CrewAI CLI Documentation + +The CrewAI CLI provides a set of commands to interact with CrewAI, allowing you to create, train, run, and manage crews and pipelines. + +## Installation + +To use the CrewAI CLI, make sure you have CrewAI & Poetry installed: + +``` +pip install crewai poetry +``` + +## Basic Usage + +The basic structure of a CrewAI CLI command is: + +``` +crewai [COMMAND] [OPTIONS] [ARGUMENTS] +``` + +## Available Commands + +### 1. create + +Create a new crew or pipeline. + +``` +crewai create [OPTIONS] TYPE NAME +``` + +- `TYPE`: Choose between "crew" or "pipeline" +- `NAME`: Name of the crew or pipeline +- `--router`: (Optional) Create a pipeline with router functionality + +Example: +``` +crewai create crew my_new_crew +crewai create pipeline my_new_pipeline --router +``` + +### 2. version + +Show the installed version of CrewAI. + +``` +crewai version [OPTIONS] +``` + +- `--tools`: (Optional) Show the installed version of CrewAI tools + +Example: +``` +crewai version +crewai version --tools +``` + +### 3. train + +Train the crew for a specified number of iterations. + +``` +crewai train [OPTIONS] +``` + +- `-n, --n_iterations INTEGER`: Number of iterations to train the crew (default: 5) +- `-f, --filename TEXT`: Path to a custom file for training (default: "trained_agents_data.pkl") + +Example: +``` +crewai train -n 10 -f my_training_data.pkl +``` + +### 4. replay + +Replay the crew execution from a specific task. + +``` +crewai replay [OPTIONS] +``` + +- `-t, --task_id TEXT`: Replay the crew from this task ID, including all subsequent tasks + +Example: +``` +crewai replay -t task_123456 +``` + +### 5. log_tasks_outputs + +Retrieve your latest crew.kickoff() task outputs. + +``` +crewai log_tasks_outputs +``` + +### 6. reset_memories + +Reset the crew memories (long, short, entity, latest_crew_kickoff_outputs). + +``` +crewai reset_memories [OPTIONS] +``` + +- `-l, --long`: Reset LONG TERM memory +- `-s, --short`: Reset SHORT TERM memory +- `-e, --entities`: Reset ENTITIES memory +- `-k, --kickoff-outputs`: Reset LATEST KICKOFF TASK OUTPUTS +- `-a, --all`: Reset ALL memories + +Example: +``` +crewai reset_memories --long --short +crewai reset_memories --all +``` + +### 7. test + +Test the crew and evaluate the results. + +``` +crewai test [OPTIONS] +``` + +- `-n, --n_iterations INTEGER`: Number of iterations to test the crew (default: 3) +- `-m, --model TEXT`: LLM Model to run the tests on the Crew (default: "gpt-4o-mini") + +Example: +``` +crewai test -n 5 -m gpt-3.5-turbo +``` + +### 8. run + +Run the crew. + +``` +crewai run +``` + +## Note + +Make sure to run these commands from the directory where your CrewAI project is set up. Some commands may require additional configuration or setup within your project structure. diff --git a/docs/getting-started/Create-a-New-CrewAI-Pipeline-Template-Method.md b/docs/getting-started/Create-a-New-CrewAI-Pipeline-Template-Method.md index f3859779a..3f9fa46a1 100644 --- a/docs/getting-started/Create-a-New-CrewAI-Pipeline-Template-Method.md +++ b/docs/getting-started/Create-a-New-CrewAI-Pipeline-Template-Method.md @@ -109,8 +109,7 @@ To install the dependencies for your project, use Poetry: ```shell $ cd -$ poetry lock -$ poetry install +$ crewai install ``` ## Running Your Pipeline Project @@ -121,12 +120,6 @@ To run your pipeline project, use the following command: $ crewai run ``` -or - -```shell -$ poetry run -``` - This will initialize your pipeline and begin task execution as defined in your `main.py` file. ## Deploying Your Pipeline Project diff --git a/docs/getting-started/Start-a-New-CrewAI-Project-Template-Method.md b/docs/getting-started/Start-a-New-CrewAI-Project-Template-Method.md index d3792413b..d873d5ee7 100644 --- a/docs/getting-started/Start-a-New-CrewAI-Project-Template-Method.md +++ b/docs/getting-started/Start-a-New-CrewAI-Project-Template-Method.md @@ -191,8 +191,7 @@ To install the dependencies for your project, you can use Poetry. First, navigat ```shell $ cd my_project -$ poetry lock -$ poetry install +$ crewai install ``` This will install the dependencies specified in the `pyproject.toml` file. @@ -233,11 +232,6 @@ To run your project, use the following command: ```shell $ crewai run ``` -or -```shell -$ poetry run my_project -``` - This will initialize your crew of AI agents and begin task execution as defined in your configuration in the `main.py` file. ### Replay Tasks from Latest Crew Kickoff diff --git a/docs/how-to/LLM-Connections.md b/docs/how-to/LLM-Connections.md index 4acdbb3e3..1f0eafd5e 100644 --- a/docs/how-to/LLM-Connections.md +++ b/docs/how-to/LLM-Connections.md @@ -88,7 +88,7 @@ There are a couple of different ways you can use HuggingFace to host your LLM. ### Your own HuggingFace endpoint ```python -from langchain_huggingface import HuggingFaceEndpoint, +from langchain_huggingface import HuggingFaceEndpoint llm = HuggingFaceEndpoint( repo_id="microsoft/Phi-3-mini-4k-instruct", @@ -194,4 +194,4 @@ azure_agent = Agent( ``` ## Conclusion -Integrating CrewAI with different LLMs expands the framework's versatility, allowing for customized, efficient AI solutions across various domains and platforms. \ No newline at end of file +Integrating CrewAI with different LLMs expands the framework's versatility, allowing for customized, efficient AI solutions across various domains and platforms. diff --git a/docs/tools/SpiderTool.md b/docs/tools/SpiderTool.md new file mode 100644 index 000000000..603f224fc --- /dev/null +++ b/docs/tools/SpiderTool.md @@ -0,0 +1,81 @@ +# SpiderTool + +## Description + +[Spider](https://spider.cloud/?ref=crewai) is the [fastest](https://github.com/spider-rs/spider/blob/main/benches/BENCHMARKS.md#benchmark-results) open source scraper and crawler that returns LLM-ready data. It converts any website into pure HTML, markdown, metadata or text while enabling you to crawl with custom actions using AI. + +## Installation + +To use the Spider API you need to download the [Spider SDK](https://pypi.org/project/spider-client/) and the crewai[tools] SDK too: + +```python +pip install spider-client 'crewai[tools]' +``` + +## Example + +This example shows you how you can use the Spider tool to enable your agent to scrape and crawl websites. The data returned from the Spider API is already LLM-ready, so no need to do any cleaning there. + +```python +from crewai_tools import SpiderTool + +def main(): + spider_tool = SpiderTool() + + searcher = Agent( + role="Web Research Expert", + goal="Find related information from specific URL's", + backstory="An expert web researcher that uses the web extremely well", + tools=[spider_tool], + verbose=True, + ) + + return_metadata = Task( + description="Scrape https://spider.cloud with a limit of 1 and enable metadata", + expected_output="Metadata and 10 word summary of spider.cloud", + agent=searcher + ) + + crew = Crew( + agents=[searcher], + tasks=[ + return_metadata, + ], + verbose=2 + ) + + crew.kickoff() + +if __name__ == "__main__": + main() +``` + +## Arguments + +- `api_key` (string, optional): Specifies Spider API key. If not specified, it looks for `SPIDER_API_KEY` in environment variables. +- `params` (object, optional): Optional parameters for the request. Defaults to `{"return_format": "markdown"}` to return the website's content in a format that fits LLMs better. + - `request` (string): The request type to perform. Possible values are `http`, `chrome`, and `smart`. Use `smart` to perform an HTTP request by default until JavaScript rendering is needed for the HTML. + - `limit` (int): The maximum number of pages allowed to crawl per website. Remove the value or set it to `0` to crawl all pages. + - `depth` (int): The crawl limit for maximum depth. If `0`, no limit will be applied. + - `cache` (bool): Use HTTP caching for the crawl to speed up repeated runs. Default is `true`. + - `budget` (object): Object that has paths with a counter for limiting the amount of pages example `{"*":1}` for only crawling the root page. + - `locale` (string): The locale to use for request, example `en-US`. + - `cookies` (string): Add HTTP cookies to use for request. + - `stealth` (bool): Use stealth mode for headless chrome request to help prevent being blocked. The default is `true` on chrome. + - `headers` (object): Forward HTTP headers to use for all request. The object is expected to be a map of key value pairs. + - `metadata` (bool): Boolean to store metadata about the pages and content found. This could help improve AI interopt. Defaults to `false` unless you have the website already stored with the configuration enabled. + - `viewport` (object): Configure the viewport for chrome. Defaults to `800x600`. + - `encoding` (string): The type of encoding to use like `UTF-8`, `SHIFT_JIS`, or etc. + - `subdomains` (bool): Allow subdomains to be included. Default is `false`. + - `user_agent` (string): Add a custom HTTP user agent to the request. By default this is set to a random agent. + - `store_data` (bool): Boolean to determine if storage should be used. If set this takes precedence over `storageless`. Defaults to `false`. + - `gpt_config` (object): Use AI to generate actions to perform during the crawl. You can pass an array for the `"prompt"` to chain steps. + - `fingerprint` (bool): Use advanced fingerprint for chrome. + - `storageless` (bool): Boolean to prevent storing any type of data for the request including storage and AI vectors embedding. Defaults to `false` unless you have the website already stored. + - `readability` (bool): Use [readability](https://github.com/mozilla/readability) to pre-process the content for reading. This may drastically improve the content for LLM usage. + `return_format` (string): The format to return the data in. Possible values are `markdown`, `raw`, `text`, and `html2text`. Use `raw` to return the default format of the page like HTML etc. + - `proxy_enabled` (bool): Enable high performance premium proxies for the request to prevent being blocked at the network level. + - `query_selector` (string): The CSS query selector to use when extracting content from the markup. + - `full_resources` (bool): Crawl and download all the resources for a website. + - `request_timeout` (int): The timeout to use for request. Timeouts can be from `5-60`. The default is `30` seconds. + - `run_in_background` (bool): Run the request in the background. Useful if storing data and wanting to trigger crawls to the dashboard. This has no effect if storageless is set. diff --git a/mkdocs.yml b/mkdocs.yml index 10029a126..8d11351ea 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -178,6 +178,7 @@ nav: - PG RAG Search: 'tools/PGSearchTool.md' - Scrape Website: 'tools/ScrapeWebsiteTool.md' - Selenium Scraper: 'tools/SeleniumScrapingTool.md' + - Spider Scraper: 'tools/SpiderTool.md' - TXT RAG Search: 'tools/TXTSearchTool.md' - Vision Tool: 'tools/VisionTool.md' - Website RAG Search: 'tools/WebsiteSearchTool.md' diff --git a/src/crewai/agent.py b/src/crewai/agent.py index b1f806973..e0b193a01 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -114,40 +114,40 @@ class Agent(BaseAgent): ) @model_validator(mode="after") - def set_agent_ops_agent_name(self) -> "Agent": - """Set agent ops agent name.""" + def post_init_setup(self): self.agent_ops_agent_name = self.role - return self - @model_validator(mode="after") - def set_agent_executor(self) -> "Agent": - """Ensure agent executor and token process are set.""" if hasattr(self.llm, "model_name"): - token_handler = TokenCalcHandler(self.llm.model_name, self._token_process) - - # Ensure self.llm.callbacks is a list - if not isinstance(self.llm.callbacks, list): - self.llm.callbacks = [] - - # Check if an instance of TokenCalcHandler already exists in the list - if not any( - isinstance(handler, TokenCalcHandler) for handler in self.llm.callbacks - ): - self.llm.callbacks.append(token_handler) - - if agentops and not any( - isinstance(handler, agentops.LangchainCallbackHandler) - for handler in self.llm.callbacks - ): - agentops.stop_instrumenting() - self.llm.callbacks.append(agentops.LangchainCallbackHandler()) + self._setup_llm_callbacks() if not self.agent_executor: - if not self.cache_handler: - self.cache_handler = CacheHandler() - self.set_cache_handler(self.cache_handler) + self._setup_agent_executor() + return self + def _setup_llm_callbacks(self): + token_handler = TokenCalcHandler(self.llm.model_name, self._token_process) + + if not isinstance(self.llm.callbacks, list): + self.llm.callbacks = [] + + if not any( + isinstance(handler, TokenCalcHandler) for handler in self.llm.callbacks + ): + self.llm.callbacks.append(token_handler) + + if agentops and not any( + isinstance(handler, agentops.LangchainCallbackHandler) + for handler in self.llm.callbacks + ): + agentops.stop_instrumenting() + self.llm.callbacks.append(agentops.LangchainCallbackHandler()) + + def _setup_agent_executor(self): + if not self.cache_handler: + self.cache_handler = CacheHandler() + self.set_cache_handler(self.cache_handler) + def execute_task( self, task: Any, diff --git a/src/crewai/agents/agent_builder/base_agent.py b/src/crewai/agents/agent_builder/base_agent.py index 0c0ebcef5..8cfcf6ebc 100644 --- a/src/crewai/agents/agent_builder/base_agent.py +++ b/src/crewai/agents/agent_builder/base_agent.py @@ -19,6 +19,7 @@ from crewai.agents.agent_builder.utilities.base_token_process import TokenProces from crewai.agents.cache.cache_handler import CacheHandler from crewai.agents.tools_handler import ToolsHandler from crewai.utilities import I18N, Logger, RPMController +from crewai.utilities.config import process_config T = TypeVar("T", bound="BaseAgent") @@ -87,12 +88,12 @@ class BaseAgent(ABC, BaseModel): role: str = Field(description="Role of the agent") goal: str = Field(description="Objective of the agent") backstory: str = Field(description="Backstory of the agent") + config: Optional[Dict[str, Any]] = Field( + description="Configuration for the agent", default=None, exclude=True + ) cache: bool = Field( default=True, description="Whether the agent should use a cache for tool usage." ) - config: Optional[Dict[str, Any]] = Field( - description="Configuration for the agent", default=None - ) verbose: bool = Field( default=False, description="Verbose mode for the Agent Execution" ) @@ -127,11 +128,29 @@ class BaseAgent(ABC, BaseModel): default=None, description="Maximum number of tokens for the agent's execution." ) + @model_validator(mode="before") + @classmethod + def process_model_config(cls, values): + return process_config(values, cls) + @model_validator(mode="after") - def set_config_attributes(self): - if self.config: - for key, value in self.config.items(): - setattr(self, key, value) + def validate_and_set_attributes(self): + # Validate required fields + for field in ["role", "goal", "backstory"]: + if getattr(self, field) is None: + raise ValueError( + f"{field} must be provided either directly or through config" + ) + + # Set private attributes + self._logger = Logger(verbose=self.verbose) + if self.max_rpm and not self._rpm_controller: + self._rpm_controller = RPMController( + max_rpm=self.max_rpm, logger=self._logger + ) + if not self._token_process: + self._token_process = TokenProcess() + return self @field_validator("id", mode="before") @@ -142,14 +161,6 @@ class BaseAgent(ABC, BaseModel): "may_not_set_field", "This field is not to be set by the user.", {} ) - @model_validator(mode="after") - def set_attributes_based_on_config(self) -> "BaseAgent": - """Set attributes based on the agent configuration.""" - if self.config: - for key, value in self.config.items(): - setattr(self, key, value) - return self - @model_validator(mode="after") def set_private_attrs(self): """Set private attributes.""" diff --git a/src/crewai/cli/cli.py b/src/crewai/cli/cli.py index 54cda8dbf..ba200f028 100644 --- a/src/crewai/cli/cli.py +++ b/src/crewai/cli/cli.py @@ -12,6 +12,7 @@ from crewai.memory.storage.kickoff_task_outputs_storage import ( from .authentication.main import AuthenticationCommand from .deploy.main import DeployCommand from .evaluate_crew import evaluate_crew +from .install_crew import install_crew from .replay_from_task import replay_task_command from .reset_memories_command import reset_memories_command from .run_crew import run_crew @@ -169,10 +170,16 @@ def test(n_iterations: int, model: str): evaluate_crew(n_iterations, model) +@crewai.command() +def install(): + """Install the Crew.""" + install_crew() + + @crewai.command() def run(): - """Run the crew.""" - click.echo("Running the crew") + """Run the Crew.""" + click.echo("Running the Crew") run_crew() diff --git a/src/crewai/cli/install_crew.py b/src/crewai/cli/install_crew.py new file mode 100644 index 000000000..bbafdad6f --- /dev/null +++ b/src/crewai/cli/install_crew.py @@ -0,0 +1,21 @@ +import subprocess + +import click + + +def install_crew() -> None: + """ + Install the crew by running the Poetry command to lock and install. + """ + try: + subprocess.run(["poetry", "lock"], check=True, capture_output=False, text=True) + subprocess.run( + ["poetry", "install"], check=True, capture_output=False, text=True + ) + + except subprocess.CalledProcessError as e: + click.echo(f"An error occurred while running the crew: {e}", err=True) + click.echo(e.output, err=True) + + except Exception as e: + click.echo(f"An unexpected error occurred: {e}", err=True) diff --git a/src/crewai/cli/templates/crew/README.md b/src/crewai/cli/templates/crew/README.md index 0914be209..5b4f02e06 100644 --- a/src/crewai/cli/templates/crew/README.md +++ b/src/crewai/cli/templates/crew/README.md @@ -14,12 +14,9 @@ pip install poetry Next, navigate to your project directory and install the dependencies: -1. First lock the dependencies and then install them: +1. First lock the dependencies and install them by using the CLI command: ```bash -poetry lock -``` -```bash -poetry install +crewai install ``` ### Customizing @@ -37,10 +34,6 @@ To kickstart your crew of AI agents and begin task execution, run this from the ```bash $ crewai run ``` -or -```bash -poetry run {{folder_name}} -``` This command initializes the {{name}} Crew, assembling the agents and assigning them tasks as defined in your configuration. diff --git a/src/crewai/cli/templates/pipeline/README.md b/src/crewai/cli/templates/pipeline/README.md index 3bb1bef6c..433e43abe 100644 --- a/src/crewai/cli/templates/pipeline/README.md +++ b/src/crewai/cli/templates/pipeline/README.md @@ -17,11 +17,7 @@ Next, navigate to your project directory and install the dependencies: 1. First lock the dependencies and then install them: ```bash -poetry lock -``` - -```bash -poetry install +crewai install ``` ### Customizing @@ -38,7 +34,7 @@ poetry install To kickstart your crew of AI agents and begin task execution, run this from the root folder of your project: ```bash -poetry run {{folder_name}} +crewai run ``` This command initializes the {{name}} Crew, assembling the agents and assigning them tasks as defined in your configuration. diff --git a/src/crewai/cli/templates/pipeline_router/README.md b/src/crewai/cli/templates/pipeline_router/README.md index 60dc617e9..d710c341a 100644 --- a/src/crewai/cli/templates/pipeline_router/README.md +++ b/src/crewai/cli/templates/pipeline_router/README.md @@ -16,10 +16,7 @@ Next, navigate to your project directory and install the dependencies: 1. First lock the dependencies and then install them: ```bash -poetry lock -``` -```bash -poetry install +crewai install ``` ### Customizing @@ -35,7 +32,7 @@ poetry install To kickstart your crew of AI agents and begin task execution, run this from the root folder of your project: ```bash -poetry run {{folder_name}} +crewai run ``` This command initializes the {{name}} Crew, assembling the agents and assigning them tasks as defined in your configuration. diff --git a/src/crewai/project/annotations.py b/src/crewai/project/annotations.py index 030341c32..fefbad884 100644 --- a/src/crewai/project/annotations.py +++ b/src/crewai/project/annotations.py @@ -1,3 +1,5 @@ +from functools import wraps + from crewai.project.utils import memoize @@ -5,21 +7,17 @@ def task(func): if not hasattr(task, "registration_order"): task.registration_order = [] - func.is_task = True - memoized_func = memoize(func) - - # Append the function name to the registration order list - task.registration_order.append(func.__name__) - + @wraps(func) def wrapper(*args, **kwargs): - result = memoized_func(*args, **kwargs) - + result = func(*args, **kwargs) if not result.name: result.name = func.__name__ - return result - return wrapper + setattr(wrapper, "is_task", True) + task.registration_order.append(func.__name__) + + return memoize(wrapper) def agent(func): diff --git a/src/crewai/task.py b/src/crewai/task.py index d00e2cc49..ea292772a 100644 --- a/src/crewai/task.py +++ b/src/crewai/task.py @@ -23,6 +23,7 @@ from crewai.agents.agent_builder.base_agent import BaseAgent from crewai.tasks.output_format import OutputFormat from crewai.tasks.task_output import TaskOutput from crewai.telemetry.telemetry import Telemetry +from crewai.utilities.config import process_config from crewai.utilities.converter import Converter, convert_to_model from crewai.utilities.i18n import I18N @@ -115,6 +116,21 @@ class Task(BaseModel): _thread: Optional[threading.Thread] = PrivateAttr(default=None) _execution_time: Optional[float] = PrivateAttr(default=None) + @model_validator(mode="before") + @classmethod + def process_model_config(cls, values): + return process_config(values, cls) + + @model_validator(mode="after") + def validate_required_fields(self): + required_fields = ["description", "expected_output"] + for field in required_fields: + if getattr(self, field) is None: + raise ValueError( + f"{field} must be provided either directly or through config" + ) + return self + @field_validator("id", mode="before") @classmethod def _deny_user_set_id(cls, v: Optional[UUID4]) -> None: diff --git a/src/crewai/utilities/config.py b/src/crewai/utilities/config.py new file mode 100644 index 000000000..56a59ce1b --- /dev/null +++ b/src/crewai/utilities/config.py @@ -0,0 +1,40 @@ +from typing import Any, Dict, Type + +from pydantic import BaseModel + + +def process_config( + values: Dict[str, Any], model_class: Type[BaseModel] +) -> Dict[str, Any]: + """ + Process the config dictionary and update the values accordingly. + + Args: + values (Dict[str, Any]): The dictionary of values to update. + model_class (Type[BaseModel]): The Pydantic model class to reference for field validation. + + Returns: + Dict[str, Any]: The updated values dictionary. + """ + config = values.get("config", {}) + if not config: + return values + + # Copy values from config (originally from YAML) to the model's attributes. + # Only copy if the attribute isn't already set, preserving any explicitly defined values. + for key, value in config.items(): + if key not in model_class.model_fields: + continue + if values.get(key) is not None: + continue + if isinstance(value, (str, int, float, bool, list)): + values[key] = value + elif isinstance(value, dict): + if isinstance(values.get(key), dict): + values[key].update(value) + else: + values[key] = value + + # Remove the config from values to avoid duplicate processing + values.pop("config", None) + return values