Compare commits

..

1 Commits

Author SHA1 Message Date
Gui Vieira
349b67e5ce Fix crewai-tools version 2024-12-05 14:31:14 -03:00
16 changed files with 84 additions and 107 deletions

View File

@@ -48,6 +48,7 @@ from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSourc
content = "Users name is John. He is 30 years old and lives in San Francisco."
string_source = StringKnowledgeSource(
content=content,
metadata={"preference": "personal"}
)
# Create an LLM with a temperature of 0 to ensure deterministic outputs
@@ -73,7 +74,10 @@ crew = Crew(
tasks=[task],
verbose=True,
process=Process.sequential,
knowledge_sources=[string_source], # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
knowledge={
"sources": [string_source],
"metadata": {"preference": "personal"}
}, # Enable knowledge by adding the sources here. You can also add more sources to the sources list.
)
result = crew.kickoff(inputs={"question": "What city does John live in and how old is he?"})
@@ -81,6 +85,17 @@ result = crew.kickoff(inputs={"question": "What city does John live in and how o
## Knowledge Configuration
### Metadata and Filtering
Knowledge sources support metadata for better organization and filtering. Metadata is used to filter the knowledge sources when querying the knowledge store.
```python Code
knowledge_source = StringKnowledgeSource(
content="Users name is John. He is 30 years old and lives in San Francisco.",
metadata={"preference": "personal"} # Metadata is used to filter the knowledge sources
)
```
### Chunking Configuration
Control how content is split for processing by setting the chunk size and overlap.
@@ -101,28 +116,21 @@ You can also configure the embedder for the knowledge store. This is useful if y
...
string_source = StringKnowledgeSource(
content="Users name is John. He is 30 years old and lives in San Francisco.",
metadata={"preference": "personal"}
)
crew = Crew(
...
knowledge_sources=[string_source],
embedder={
"provider": "openai",
"config": {"model": "text-embedding-3-small"},
knowledge={
"sources": [string_source],
"metadata": {"preference": "personal"},
"embedder_config": {
"provider": "openai", # Default embedder provider; can be "ollama", "gemini", e.t.c.
"config": {"model": "text-embedding-3-small"} # Default embedder model; can be "mxbai-embed-large", "nomic-embed-tex", e.t.c.
},
},
)
```
## Clearing Knowledge
If you need to clear the knowledge stored in CrewAI, you can use the `crewai reset-memories` command with the `--knowledge` option.
```bash Command
crewai reset-memories --knowledge
```
This is useful when you've updated your knowledge sources and want to ensure that the agents are using the most recent information.
## Custom Knowledge Sources
CrewAI allows you to create custom knowledge sources for any type of data by extending the `BaseKnowledgeSource` class. Let's create a practical example that fetches and processes space news articles.
@@ -166,12 +174,12 @@ class SpaceNewsKnowledgeSource(BaseKnowledgeSource):
formatted = "Space News Articles:\n\n"
for article in articles:
formatted += f"""
Title: {article['title']}
Published: {article['published_at']}
Summary: {article['summary']}
News Site: {article['news_site']}
URL: {article['url']}
-------------------"""
Title: {article['title']}
Published: {article['published_at']}
Summary: {article['summary']}
News Site: {article['news_site']}
URL: {article['url']}
-------------------"""
return formatted
def add(self) -> None:
@@ -181,12 +189,17 @@ class SpaceNewsKnowledgeSource(BaseKnowledgeSource):
chunks = self._chunk_text(text)
self.chunks.extend(chunks)
self._save_documents()
self.save_documents(metadata={
"source": "space_news_api",
"timestamp": datetime.now().isoformat(),
"article_count": self.limit
})
# Create knowledge source
recent_news = SpaceNewsKnowledgeSource(
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles",
limit=10,
metadata={"category": "recent_news", "source": "spaceflight_news"}
)
# Create specialized agent
@@ -252,7 +265,7 @@ The latest developments in space exploration, based on recent space news article
- Implements three key methods:
- `load_content()`: Fetches articles from the API
- `_format_articles()`: Structures the articles into readable text
- `add()`: Processes and stores the content
- `add()`: Processes and stores the content with metadata
2. **Agent Configuration**:
- Specialized role as a Space News Analyst
@@ -286,12 +299,14 @@ You can customize the API query by modifying the endpoint URL:
recent_news = SpaceNewsKnowledgeSource(
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles",
limit=20, # Increase the number of articles
metadata={"category": "recent_news"}
)
# Add search parameters
recent_news = SpaceNewsKnowledgeSource(
api_endpoint="https://api.spaceflightnewsapi.net/v4/articles?search=NASA", # Search for NASA news
limit=10,
metadata={"category": "nasa_news"}
)
```
@@ -299,14 +314,16 @@ recent_news = SpaceNewsKnowledgeSource(
<AccordionGroup>
<Accordion title="Content Organization">
- Use descriptive metadata for better filtering
- Keep chunk sizes appropriate for your content type
- Consider content overlap for context preservation
- Organize related information into separate knowledge sources
</Accordion>
<Accordion title="Performance Tips">
- Use metadata filtering to narrow search scope
- Adjust chunk sizes based on content complexity
- Configure appropriate embedding models
- Consider using local embedding providers for faster processing
</Accordion>
</AccordionGroup>
</AccordionGroup>

View File

@@ -57,7 +57,7 @@ This feature is useful for debugging and understanding how agents interact with
<Step title="Install AgentOps">
Install AgentOps with:
```bash
pip install 'crewai[agentops]'
pip install crewai[agentops]
```
or
```bash

View File

@@ -64,7 +64,7 @@ dev-dependencies = [
"mkdocs-material-extensions>=1.3.1",
"pillow>=10.2.0",
"cairosvg>=2.7.1",
"crewai-tools>=0.14.0",
"crewai-tools>=0.17.0,<0.18.0",
"pytest>=8.0.0",
"pytest-vcr>=1.0.2",
"python-dotenv>=1.0.0",

View File

@@ -12,6 +12,6 @@ reporting_task:
Review the context you got and expand each topic into a full section for a report.
Make sure the report is detailed and contains any and all relevant information.
expected_output: >
A fully fledged report with the main topics, each with a full section of information.
A fully fledge reports with the mains topics, each with a full section of information.
Formatted as markdown without '```'
agent: reporting_analyst

View File

@@ -1,10 +1,11 @@
import os
from typing import Any, Dict, List, Optional
from typing import List, Optional, Dict, Any
from pydantic import BaseModel, ConfigDict, Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from crewai.utilities.constants import DEFAULT_SCORE_THRESHOLD
os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed
@@ -45,7 +46,9 @@ class Knowledge(BaseModel):
source.storage = self.storage
source.add()
def query(self, query: List[str], limit: int = 3) -> List[Dict[str, Any]]:
def query(
self, query: List[str], limit: int = 3, preference: Optional[str] = None
) -> List[Dict[str, Any]]:
"""
Query across all knowledge sources to find the most relevant information.
Returns the top_k most relevant chunks.
@@ -54,6 +57,8 @@ class Knowledge(BaseModel):
results = self.storage.search(
query,
limit,
filter={"preference": preference} if preference else None,
score_threshold=DEFAULT_SCORE_THRESHOLD,
)
return results

View File

@@ -1,13 +1,13 @@
from abc import ABC, abstractmethod
from pathlib import Path
from typing import Dict, List, Union
from typing import Union, List, Dict, Any
from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.utilities.logger import Logger
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
from crewai.utilities.logger import Logger
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
@@ -49,9 +49,10 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
color="red",
)
def _save_documents(self):
def save_documents(self, metadata: Dict[str, Any]):
"""Save the documents to the storage."""
self.storage.save(self.chunks)
chunk_metadatas = [metadata.copy() for _ in self.chunks]
self.storage.save(self.chunks, chunk_metadatas)
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""

View File

@@ -1,5 +1,5 @@
from abc import ABC, abstractmethod
from typing import Any, Dict, List, Optional
from typing import List, Dict, Any, Optional
import numpy as np
from pydantic import BaseModel, ConfigDict, Field
@@ -17,7 +17,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
model_config = ConfigDict(arbitrary_types_allowed=True)
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
metadata: Dict[str, Any] = Field(default_factory=dict) # Currently unused
metadata: Dict[str, Any] = Field(default_factory=dict)
collection_name: Optional[str] = Field(default=None)
@abstractmethod
@@ -41,9 +41,9 @@ class BaseKnowledgeSource(BaseModel, ABC):
for i in range(0, len(text), self.chunk_size - self.chunk_overlap)
]
def _save_documents(self):
def save_documents(self, metadata: Dict[str, Any]):
"""
Save the documents to the storage.
This method should be called after the chunks and embeddings are generated.
"""
self.storage.save(self.chunks)
self.storage.save(self.chunks, metadata)

View File

@@ -1,6 +1,6 @@
import csv
from pathlib import Path
from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -30,7 +30,7 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self._save_documents()
self.save_documents(metadata=self.metadata)
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,6 +1,5 @@
from pathlib import Path
from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -45,7 +44,7 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self._save_documents()
self.save_documents(metadata=self.metadata)
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,6 +1,6 @@
import json
from pathlib import Path
from typing import Any, Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -42,7 +42,7 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
)
new_chunks = self._chunk_text(content_str)
self.chunks.extend(new_chunks)
self._save_documents()
self.save_documents(metadata=self.metadata)
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,5 +1,5 @@
from typing import List, Dict
from pathlib import Path
from typing import Dict, List
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -43,7 +43,7 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
for _, text in self.content.items():
new_chunks = self._chunk_text(text)
self.chunks.extend(new_chunks)
self._save_documents()
self.save_documents(metadata=self.metadata)
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -24,7 +24,7 @@ class StringKnowledgeSource(BaseKnowledgeSource):
"""Add string content to the knowledge source, chunk it, compute embeddings, and save them."""
new_chunks = self._chunk_text(self.content)
self.chunks.extend(new_chunks)
self._save_documents()
self.save_documents(metadata=self.metadata)
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,5 +1,5 @@
from pathlib import Path
from typing import Dict, List
from pathlib import Path
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
@@ -24,7 +24,7 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource):
for _, text in self.content.items():
new_chunks = self._chunk_text(text)
self.chunks.extend(new_chunks)
self._save_documents()
self.save_documents(metadata=self.metadata)
def _chunk_text(self, text: str) -> List[str]:
"""Utility method to split text into chunks."""

View File

@@ -1,20 +1,18 @@
import contextlib
import hashlib
import io
import logging
import os
from typing import Any, Dict, List, Optional, Union, cast
import chromadb
import chromadb.errors
from chromadb.api import ClientAPI
from chromadb.api.types import OneOrMany
from chromadb.config import Settings
import os
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
from crewai.utilities import EmbeddingConfigurator
from crewai.utilities.logger import Logger
import chromadb.errors
from crewai.utilities.paths import db_storage_path
from typing import Optional, List, Dict, Any, Union
from crewai.utilities import EmbeddingConfigurator
from crewai.knowledge.storage.base_knowledge_storage import BaseKnowledgeStorage
import hashlib
from chromadb.config import Settings
from chromadb.api import ClientAPI
from crewai.utilities.logger import Logger
@contextlib.contextmanager
@@ -118,16 +116,11 @@ class KnowledgeStorage(BaseKnowledgeStorage):
def save(
self,
documents: List[str],
metadata: Optional[Union[Dict[str, Any], List[Dict[str, Any]]]] = None,
metadata: Union[Dict[str, Any], List[Dict[str, Any]]],
):
if self.collection:
try:
if metadata is None:
metadatas: Optional[OneOrMany[chromadb.Metadata]] = None
elif isinstance(metadata, list):
metadatas = [cast(chromadb.Metadata, m) for m in metadata]
else:
metadatas = cast(chromadb.Metadata, metadata)
metadatas = [metadata] if isinstance(metadata, dict) else metadata
ids = [
hashlib.sha256(doc.encode("utf-8")).hexdigest() for doc in documents

View File

@@ -1,5 +1,4 @@
import logging
import os
import sys
import threading
import warnings
@@ -129,7 +128,6 @@ class LLM:
litellm.drop_params = True
litellm.set_verbose = False
self.set_callbacks(callbacks)
self.set_env_callbacks()
def call(self, messages: List[Dict[str, str]], callbacks: List[Any] = []) -> str:
with suppress_warnings():
@@ -204,39 +202,3 @@ class LLM:
litellm._async_success_callback.remove(callback)
litellm.callbacks = callbacks
def set_env_callbacks(self):
"""
Sets the success and failure callbacks for the LiteLLM library from environment variables.
This method reads the `LITELLM_SUCCESS_CALLBACKS` and `LITELLM_FAILURE_CALLBACKS`
environment variables, which should contain comma-separated lists of callback names.
It then assigns these lists to `litellm.success_callback` and `litellm.failure_callback`,
respectively.
If the environment variables are not set or are empty, the corresponding callback lists
will be set to empty lists.
Example:
LITELLM_SUCCESS_CALLBACKS="langfuse,langsmith"
LITELLM_FAILURE_CALLBACKS="langfuse"
This will set `litellm.success_callback` to ["langfuse", "langsmith"] and
`litellm.failure_callback` to ["langfuse"].
"""
success_callbacks_str = os.environ.get("LITELLM_SUCCESS_CALLBACKS", "")
success_callbacks = []
if success_callbacks_str:
success_callbacks = [
callback.strip() for callback in success_callbacks_str.split(",")
]
failure_callbacks_str = os.environ.get("LITELLM_FAILURE_CALLBACKS", "")
failure_callbacks = []
if failure_callbacks_str:
failure_callbacks = [
callback.strip() for callback in failure_callbacks_str.split(",")
]
litellm.success_callback = success_callbacks
litellm.failure_callback = failure_callbacks

2
uv.lock generated
View File

@@ -714,7 +714,7 @@ requires-dist = [
[package.metadata.requires-dev]
dev = [
{ name = "cairosvg", specifier = ">=2.7.1" },
{ name = "crewai-tools", specifier = ">=0.14.0" },
{ name = "crewai-tools", specifier = ">=0.17.0,<0.18.0" },
{ name = "mkdocs", specifier = ">=1.4.3" },
{ name = "mkdocs-material", specifier = ">=9.5.7" },
{ name = "mkdocs-material-extensions", specifier = ">=1.3.1" },