Compare commits

..

7 Commits

Author SHA1 Message Date
Eduardo Chiarotti
906be132f8 feat: update tasks config main template typos 2024-12-09 11:09:46 -03:00
Eduardo Chiarotti
9a5727b149 feat: remove import due to circular improt 2024-12-09 10:00:31 -03:00
Eduardo Chiarotti
ef3b93ab93 feat: Add ContextualMemory to __init__ 2024-12-09 09:58:15 -03:00
Eduardo Chiarotti
3dbcec9434 docs: Add quotes to agentops installing command 2024-12-09 09:53:03 -03:00
Brandon Hancock (bhancock_ai)
6930b68484 add support for langfuse with litellm (#1721) 2024-12-06 13:57:28 -05:00
Brandon Hancock (bhancock_ai)
c7c0647dd2 drop metadata requirement (#1712)
* drop metadata requirement

* fix linting

* Update docs for new knowledge

* more linting

* more linting

* make save_documents private

* update docs to the new way we use knowledge and include clearing memory
2024-12-05 14:59:52 -05:00
Brandon Hancock (bhancock_ai)
7b276e6797 Incorporate Stale PRs that have feedback (#1693)
* incorporate #1683

* add in --version flag to cli. closes #1679.

* Fix env issue

* Add in suggestions from @caike to make sure ragstorage doesnt exceed os file limit. Also, included additional checks to support windows.

* remove poetry.lock as pointed out by @sanders41 in #1574.

* Incorporate feedback from crewai reviewer

* Incorporate @lorenzejay feedback
2024-12-05 12:17:23 -05:00
20 changed files with 148 additions and 100 deletions

View File

@@ -48,7 +48,6 @@ from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSourc
content = "Users name is John. He is 30 years old and lives in San Francisco."
string_source = StringKnowledgeSource(
content=content,
metadata={"preference": "personal"}
)
# Create an LLM with a temperature of 0 to ensure deterministic outputs
@@ -74,10 +73,7 @@ crew = Crew(
tasks=[task],
verbose=True,
process=Process.sequential,
knowledge={
"sources": [string_source],
"metadata": {"preference": "personal"}
}, # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
knowledge_sources=[string_source], # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
)
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
@@ -85,17 +81,6 @@ result = crew.kickoff(inputs={"question": "What city does John live in and how o
## Knowledge Configuration
### Metadata and Filtering
Knowledge sources support metadata for better organization and filtering. Metadata is used to filter the knowledge sources when querying the knowledge store.
```python Code
knowledge_source = StringKnowledgeSource(
content="Users name is John. He is 30 years old and lives in San Francisco.",
metadata={"preference": "personal"} # Metadata is used to filter the knowledge sources
)
```
### Chunking Configuration
Control how content is split for processing by setting the chunk size and overlap.
@@ -116,21 +101,28 @@ You can also configure the embedder for the knowledge store. This is useful if y
...
string_source = StringKnowledgeSource(
content="Users name is John. He is 30 years old and lives in San Francisco.",
metadata={"preference": "personal"}
)
crew = Crew(
...
knowledge={
"sources": [string_source],
"metadata": {"preference": "personal"},
"embedder_config": {
"provider": "openai", # Default embedder provider; can be "ollama", "gemini", e.t.c.
"config": {"model": "text-embedding-3-small"} # Default embedder model; can be "mxbai-embed-large", "nomic-embed-tex", e.t.c.
},
knowledge_sources=[string_source],
embedder={
"provider": "openai",
"config": {"model": "text-embedding-3-small"},
},
)
```
## Clearing Knowledge
If you need to clear the knowledge stored in CrewAI, you can use the `crewai reset-memories` command with the `--knowledge` option.
```bash Command
crewai reset-memories --knowledge
```
This is useful when you've updated your knowledge sources and want to ensure that the agents are using the most recent information.
## Custom Knowledge Sources
CrewAI allows you to create custom knowledge sources for any type of data by extending the `BaseKnowledgeSource` class. Let's create a practical example that fetches and processes space news articles.
@@ -174,12 +166,12 @@ class SpaceNewsKnowledgeSource(BaseKnowledgeSource):
formatted = "Space News Articles:\n\n"
for article in articles:
formatted += f"""
Title: {article['title']}
Published: {article['published_at']}
Summary: {article['summary']}
News Site: {article['news_site']}
URL: {article['url']}
-------------------"""
Title: {article['title']}
Published: {article['published_at']}
Summary: {article['summary']}
News Site: {article['news_site']}
URL: {article['url']}
-------------------"""
return formatted
def add(self) -> None:
@@ -189,17 +181,12 @@ URL: {article['url']}
chunks = self._chunk_text(text)
self.chunks.extend(chunks)
self.save_documents(metadata={
"source": "space_news_api",
"timestamp": datetime.now().isoformat(),
"article_count": self.limit
})
self._save_documents()
# Create knowledge source
recent_news = SpaceNewsKnowledgeSource(
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles",
limit=10,
metadata={"category": "recent_news", "source": "spaceflight_news"}
)
# Create specialized agent
@@ -265,7 +252,7 @@ The latest developments in space exploration, based on recent space news article
- Implements three key methods:
- `load_content()`: Fetches articles from the API
- `_format_articles()`: Structures the articles into readable text
- `add()`: Processes and stores the content with metadata
- `add()`: Processes and stores the content
2. **Agent Configuration**:
- Specialized role as a Space News Analyst
@@ -299,14 +286,12 @@ You can customize the API query by modifying the endpoint URL:
recent_news = SpaceNewsKnowledgeSource(
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles",
limit=20, # Increase the number of articles
metadata={"category": "recent_news"}
)
# Add search parameters
recent_news = SpaceNewsKnowledgeSource(
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles?search=NASA", # Search for NASA news
limit=10,
metadata={"category": "nasa_news"}
)
```
@@ -314,16 +299,14 @@ recent_news = SpaceNewsKnowledgeSource(
<AccordionGroup>
<Accordion title="Content Organization">
- Use descriptive metadata for better filtering
- Keep chunk sizes appropriate for your content type
- Consider content overlap for context preservation
- Organize related information into separate knowledge sources
</Accordion>
<Accordion title="Performance Tips">
- Use metadata filtering to narrow search scope
- Adjust chunk sizes based on content complexity
- Configure appropriate embedding models
- Consider using local embedding providers for faster processing
</Accordion>
</AccordionGroup>
</AccordionGroup>

View File

@@ -57,7 +57,7 @@ This feature is useful for debugging and understanding how agents interact with
<Step title="Install AgentOps">
Install AgentOps with:
```bash
pip install crewai[agentops]
pip install 'crewai[agentops]'
```
or
```bash

View File

@@ -8,7 +8,7 @@ from pydantic import Field, InstanceOf, PrivateAttr, model_validator
from crewai.agents import CacheHandler
from crewai.agents.agent_builder.base_agent import BaseAgent
from crewai.agents.crew_agent_executor import CrewAgentExecutor
from crewai.cli.constants import ENV_VARS
from crewai.cli.constants import ENV_VARS, LITELLM_PARAMS
from crewai.knowledge.knowledge import Knowledge
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.utils.knowledge_utils import extract_knowledge_context
@@ -181,20 +181,11 @@ class Agent(BaseAgent):
if key_name and key_name not in unaccepted_attributes:
env_value = os.environ.get(key_name)
if env_value:
# Map key names containing "API_KEY" to "api_key"
key_name = (
"api_key" if "API_KEY" in key_name else key_name
)
# Map key names containing "API_BASE" to "api_base"
key_name = (
"api_base" if "API_BASE" in key_name else key_name
)
# Map key names containing "API_VERSION" to "api_version"
key_name = (
"api_version"
if "API_VERSION" in key_name
else key_name
)
key_name = key_name.lower()
for pattern in LITELLM_PARAMS:
if pattern in key_name:
key_name = pattern
break
llm_params[key_name] = env_value
# Check for default values if the environment variable is not set
elif env_var.get("default", False):

View File

@@ -25,6 +25,7 @@ from .update_crew import update_crew
@click.group()
@click.version_option(pkg_resources.get_distribution("crewai").version)
def crewai():
"""Top-level command group for crewai."""
@@ -50,7 +51,10 @@ def create(type, name, provider, skip_provider=False):
)
def version(tools):
"""Show the installed version of crewai."""
crewai_version = pkg_resources.get_distribution("crewai").version
try:
crewai_version = pkg_resources.get_distribution("crewai").version
except Exception:
crewai_version = "unknown version"
click.echo(f"crewai version: {crewai_version}")
if tools:

View File

@@ -159,3 +159,6 @@ MODELS = {
}
JSON_URL = "https://raw.githubusercontent.com/BerriAI/litellm/main/model_prices_and_context_window.json"
LITELLM_PARAMS = ["api_key", "api_base", "api_version"]

View File

@@ -12,6 +12,6 @@ reporting_task:
Review the context you got and expand each topic into a full section for a report.
Make sure the report is detailed and contains any and all relevant information.
expected_output: >
A fully fledge reports with the mains topics, each with a full section of information.
A fully fledged report with the main topics, each with a full section of information.
Formatted as markdown without '```'
agent: reporting_analyst

View File

@@ -1,11 +1,10 @@
import os
from typing import Any, Dict, List, Optional
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, ConfigDict, Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from crewai.utilities.constants import DEFAULT_SCORE_THRESHOLD
os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed
@@ -46,9 +45,7 @@ class Knowledge(BaseModel):
source.storage = self.storage
source.add()
def query(
self, query: List[str], limit: int = 3, preference: Optional[str] = None
) -> List[Dict[str, Any]]:
def query(self, query: List[str], limit: int = 3) -> List[Dict[str, Any]]:
"""
Query across all knowledge sources to find the most relevant information.
Returns the top_k most relevant chunks.
@@ -57,8 +54,6 @@ class Knowledge(BaseModel):
results = self.storage.search(
query,
limit,
filter={"preference": preference} if preference else None,
score_threshold=DEFAULT_SCORE_THRESHOLD,
)
return results

View File

@@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Union, List, Dict, Any
from typing import Dict, List, Union
from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.utilities.logger import Logger
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
from crewai.utilities.logger import Logger
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
@@ -49,10 +49,9 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
color="red",
)
def save_documents(self, metadata: Dict[str, Any]):
def _save_documents(self):
"""Save the documents to the storage."""
chunk_metadatas = [metadata.copy() for _ in self.chunks]
self.storage.save(self.chunks, chunk_metadatas)
self.storage.save(self.chunks)
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import List, Dict, Any, Optional
from typing import Any, Dict, List, Optional
import numpy as np
from pydantic import BaseModel, ConfigDict, Field
@@ -17,7 +17,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
model_config = ConfigDict(arbitrary_types_allowed=True)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
metadata: Dict[str, Any] = Field(default_factory=dict)
metadata: Dict[str, Any] = Field(default_factory=dict) # Currently unused
collection_name: Optional[str] = Field(default=None)
@abstractmethod
@@ -41,9 +41,9 @@ class BaseKnowledgeSource(BaseModel, ABC):
for i in range(0, len(text), self.chunk_size - self.chunk_overlap)
]
def save_documents(self, metadata: Dict[str, Any]):
def _save_documents(self):
"""
Save the documents to the storage.
This method should be called after the chunks and embeddings are generated.
"""
self.storage.save(self.chunks, metadata)
self.storage.save(self.chunks)

View File

@@ -1,6 +1,6 @@
import csv
from typing import Dict, List
from pathlib import Path
from typing import Dict, List
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -30,7 +30,7 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)
self._save_documents()
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,5 +1,6 @@
from typing import Dict, List
from pathlib import Path
from typing import Dict, List
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -44,7 +45,7 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)
self._save_documents()
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,6 +1,6 @@
import json
from typing import Any, Dict, List
from pathlib import Path
from typing import Any, Dict, List
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -42,7 +42,7 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)
self._save_documents()
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,5 +1,5 @@
from typing import List, Dict
from pathlib import Path
from typing import Dict, List
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -43,7 +43,7 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
for _, text in self.content.items():
new_chunks = self._chunk_text(text)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)
self._save_documents()
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -24,7 +24,7 @@ class StringKnowledgeSource(BaseKnowledgeSource):
"""Add string content to the knowledge source, chunk it, compute embeddings, and save them."""
new_chunks = self._chunk_text(self.content)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)
self._save_documents()
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,5 +1,5 @@
from typing import Dict, List
from pathlib import Path
from typing import Dict, List
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -24,7 +24,7 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
for _, text in self.content.items():
new_chunks = self._chunk_text(text)
self.chunks.extend(new_chunks)
self.save_documents(metadata=self.metadata)
self._save_documents()
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,18 +1,20 @@
import contextlib
import hashlib
import io
import logging
import chromadb
import os
from typing import Any, Dict, List, Optional, Union, cast
import chromadb
import chromadb.errors
from crewai.utilities.paths import db_storage_path
from typing import Optional, List, Dict, Any, Union
from crewai.utilities import EmbeddingConfigurator
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
import hashlib
from chromadb.config import Settings
from chromadb.api import ClientAPI
from chromadb.api.types import OneOrMany
from chromadb.config import Settings
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
from crewai.utilities import EmbeddingConfigurator
from crewai.utilities.logger import Logger
from crewai.utilities.paths import db_storage_path
@contextlib.contextmanager
@@ -116,11 +118,16 @@ class KnowledgeStorage(BaseKnowledgeStorage):
def save(
self,
documents: List[str],
metadata: Union[Dict[str, Any], List[Dict[str, Any]]],
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
):
if self.collection:
try:
metadatas = [metadata] if isinstance(metadata, dict) else metadata
if metadata is None:
metadatas: Optional[OneOrMany[chromadb.Metadata]] = None
elif isinstance(metadata, list):
metadatas = [cast(chromadb.Metadata, m) for m in metadata]
else:
metadatas = cast(chromadb.Metadata, metadata)
ids = [
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents

View File

@@ -1,4 +1,5 @@
import logging
import os
import sys
import threading
import warnings
@@ -128,6 +129,7 @@ class LLM:
litellm.drop_params = True
litellm.set_verbose = False
self.set_callbacks(callbacks)
self.set_env_callbacks()
def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str:
with suppress_warnings():
@@ -202,3 +204,39 @@ class LLM:
litellm._async_success_callback.remove(callback)
litellm.callbacks = callbacks
def set_env_callbacks(self):
"""
Sets the success and failure callbacks for the LiteLLM library from environment variables.
This method reads the `LITELLM_SUCCESS_CALLBACKS` and `LITELLM_FAILURE_CALLBACKS`
environment variables, which should contain comma-separated lists of callback names.
It then assigns these lists to `litellm.success_callback` and `litellm.failure_callback`,
respectively.
If the environment variables are not set or are empty, the corresponding callback lists
will be set to empty lists.
Example:
LITELLM_SUCCESS_CALLBACKS="langfuse,langsmith"
LITELLM_FAILURE_CALLBACKS="langfuse"
This will set `litellm.success_callback` to ["langfuse", "langsmith"] and
`litellm.failure_callback` to ["langfuse"].
"""
success_callbacks_str = os.environ.get("LITELLM_SUCCESS_CALLBACKS", "")
success_callbacks = []
if success_callbacks_str:
success_callbacks = [
callback.strip() for callback in success_callbacks_str.split(",")
]
failure_callbacks_str = os.environ.get("LITELLM_FAILURE_CALLBACKS", "")
failure_callbacks = []
if failure_callbacks_str:
failure_callbacks = [
callback.strip() for callback in failure_callbacks_str.split(",")
]
litellm.success_callback = success_callbacks
litellm.failure_callback = failure_callbacks

View File

@@ -4,12 +4,14 @@ import logging
import os
import shutil
import uuid
from typing import Any, Dict, List, Optional
from chromadb.api import ClientAPI
from crewai.memory.storage.base_rag_storage import BaseRAGStorage
from crewai.utilities.paths import db_storage_path
from crewai.utilities import EmbeddingConfigurator
from crewai.utilities.constants import MAX_FILE_NAME_LENGTH
from crewai.utilities.paths import db_storage_path
@contextlib.contextmanager
@@ -37,12 +39,15 @@ class RAGStorage(BaseRAGStorage):
app: ClientAPI | None = None
def __init__(self, type, allow_reset=True, embedder_config=None, crew=None, path=None):
def __init__(
self, type, allow_reset=True, embedder_config=None, crew=None, path=None
):
super().__init__(type, allow_reset, embedder_config, crew)
agents = crew.agents if crew else []
agents = [self._sanitize_role(agent.role) for agent in agents]
agents = "_".join(agents)
self.agents = agents
self.storage_file_name = self._build_storage_file_name(type, agents)
self.type = type
@@ -60,7 +65,7 @@ class RAGStorage(BaseRAGStorage):
self._set_embedder_config()
chroma_client = chromadb.PersistentClient(
path=self.path if self.path else f"{db_storage_path()}/{self.type}/{self.agents}",
path=self.path if self.path else self.storage_file_name,
settings=Settings(allow_reset=self.allow_reset),
)
@@ -81,6 +86,20 @@ class RAGStorage(BaseRAGStorage):
"""
return role.replace("\n", "").replace(" ", "_").replace("/", "_")
def _build_storage_file_name(self, type: str, file_name: str) -> str:
"""
Ensures file name does not exceed max allowed by OS
"""
base_path = f"{db_storage_path()}/{type}"
if len(file_name) > MAX_FILE_NAME_LENGTH:
logging.warning(
f"Trimming file name from {len(file_name)} to {MAX_FILE_NAME_LENGTH} characters."
)
file_name = file_name[:MAX_FILE_NAME_LENGTH]
return f"{base_path}/{file_name}"
def save(self, value: Any, metadata: Dict[str, Any]) -> None:
if not hasattr(self, "app") or not hasattr(self, "collection"):
self._initialize_app()

View File

@@ -3,3 +3,4 @@ TRAINED_AGENTS_DATA_FILE = "trained_agents_data.pkl"
DEFAULT_SCORE_THRESHOLD = 0.35
KNOWLEDGE_DIRECTORY = "knowledge"
MAX_LLM_RETRY = 3
MAX_FILE_NAME_LENGTH = 255

View File

@@ -131,6 +131,13 @@ def test_reset_no_memory_flags(runner):
)
def test_version_flag(runner):
result = runner.invoke(version)
assert result.exit_code == 0
assert "crewai version:" in result.output
def test_version_command(runner):
result = runner.invoke(version)