mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
WIP
This commit is contained in:
@@ -8,11 +8,12 @@ from pydantic import Field, InstanceOf, PrivateAttr, model_validator
|
|||||||
from crewai.agents import CacheHandler
|
from crewai.agents import CacheHandler
|
||||||
from crewai.agents.agent_builder.base_agent import BaseAgent
|
from crewai.agents.agent_builder.base_agent import BaseAgent
|
||||||
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
from crewai.agents.crew_agent_executor import CrewAgentExecutor
|
||||||
from crewai.knowledge import StringKnowledgeBase
|
|
||||||
|
# from crewai.knowledge import StringKnowledgeBase
|
||||||
from crewai.llm import LLM
|
from crewai.llm import LLM
|
||||||
from crewai.memory.contextual.contextual_memory import ContextualMemory
|
from crewai.memory.contextual.contextual_memory import ContextualMemory
|
||||||
from crewai.tools.agent_tools.agent_tools import AgentTools
|
|
||||||
from crewai.tools import BaseTool
|
from crewai.tools import BaseTool
|
||||||
|
from crewai.tools.agent_tools.agent_tools import AgentTools
|
||||||
from crewai.utilities import Converter, Prompts
|
from crewai.utilities import Converter, Prompts
|
||||||
from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
|
from crewai.utilities.constants import TRAINED_AGENTS_DATA_FILE, TRAINING_DATA_FILE
|
||||||
from crewai.utilities.token_counter_callback import TokenCalcHandler
|
from crewai.utilities.token_counter_callback import TokenCalcHandler
|
||||||
@@ -188,7 +189,7 @@ class Agent(BaseAgent):
|
|||||||
if self.allow_code_execution:
|
if self.allow_code_execution:
|
||||||
self._validate_docker_installation()
|
self._validate_docker_installation()
|
||||||
|
|
||||||
self.knowledge = StringKnowledgeBase(content=self.knowledge)
|
# self.knowledge = StringKnowledgeBase(content=self.knowledge)
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
55
src/crewai/knowledge/embedder/base_embedder.py
Normal file
55
src/crewai/knowledge/embedder/base_embedder.py
Normal file
@@ -0,0 +1,55 @@
|
|||||||
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import List
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
|
||||||
|
class BaseEmbedder(ABC):
|
||||||
|
"""
|
||||||
|
Abstract base class for text embedding models
|
||||||
|
"""
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def embed_chunks(self, chunks: List[str]) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Generate embeddings for a list of text chunks
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunks: List of text chunks to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of embeddings
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def embed_texts(self, texts: List[str]) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Generate embeddings for a list of texts
|
||||||
|
|
||||||
|
Args:
|
||||||
|
texts: List of texts to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of embeddings
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def embed_text(self, text: str) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Generate embedding for a single text
|
||||||
|
|
||||||
|
Args:
|
||||||
|
text: Text to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Embedding array
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@property
|
||||||
|
@abstractmethod
|
||||||
|
def dimension(self) -> int:
|
||||||
|
"""Get the dimension of the embeddings"""
|
||||||
|
pass
|
||||||
@@ -1,9 +1,12 @@
|
|||||||
from typing import List, Optional, Union
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
from typing import List, Optional, Union
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
|
from .base_embedder import BaseEmbedder
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from fastembed_gpu import TextEmbedding
|
from fastembed_gpu import TextEmbedding # type: ignore
|
||||||
|
|
||||||
FASTEMBED_AVAILABLE = True
|
FASTEMBED_AVAILABLE = True
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -15,7 +18,7 @@ except ImportError:
|
|||||||
FASTEMBED_AVAILABLE = False
|
FASTEMBED_AVAILABLE = False
|
||||||
|
|
||||||
|
|
||||||
class Embeddings:
|
class FastEmbed(BaseEmbedder):
|
||||||
"""
|
"""
|
||||||
A wrapper class for text embedding models using FastEmbed
|
A wrapper class for text embedding models using FastEmbed
|
||||||
"""
|
"""
|
||||||
@@ -44,6 +47,20 @@ class Embeddings:
|
|||||||
cache_dir=str(cache_dir) if cache_dir else None,
|
cache_dir=str(cache_dir) if cache_dir else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def embed_chunks(self, chunks: List[str]) -> np.ndarray:
|
||||||
|
"""
|
||||||
|
Generate embeddings for a list of text chunks
|
||||||
|
|
||||||
|
Args:
|
||||||
|
chunks: List of text chunks to embed
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Array of embeddings
|
||||||
|
"""
|
||||||
|
# FastEmbed returns a generator, convert to list then numpy array
|
||||||
|
embeddings = list(self.model.embed(chunks))
|
||||||
|
return np.array(embeddings)
|
||||||
|
|
||||||
def embed_texts(self, texts: List[str]) -> np.ndarray:
|
def embed_texts(self, texts: List[str]) -> np.ndarray:
|
||||||
"""
|
"""
|
||||||
Generate embeddings for a list of texts
|
Generate embeddings for a list of texts
|
||||||
21
src/crewai/knowledge/knowledge.py
Normal file
21
src/crewai/knowledge/knowledge.py
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
from typing import List, Optional
|
||||||
|
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
from .embedder.base_embedder import BaseEmbedder
|
||||||
|
from .embedder.fastembed import FastEmbed
|
||||||
|
from .source.base_knowledge_source import BaseKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
|
class Knowledge(BaseModel):
|
||||||
|
sources: Optional[List[BaseKnowledgeSource]] = None
|
||||||
|
embedder: BaseEmbedder
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
sources: Optional[List[BaseKnowledgeSource]] = None,
|
||||||
|
embedder: Optional[BaseEmbedder] = None,
|
||||||
|
):
|
||||||
|
super().__init__()
|
||||||
|
self.sources = sources or []
|
||||||
|
self.embedder = embedder or FastEmbed()
|
||||||
@@ -1,23 +1,23 @@
|
|||||||
from typing import List, Any, Optional, Dict
|
|
||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
|
from typing import Any, Dict, List
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from .embeddings import Embeddings
|
|
||||||
|
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||||
|
|
||||||
|
|
||||||
class BaseKnowledgeBase(ABC):
|
class BaseKnowledgeSource(ABC):
|
||||||
"""Abstract base class for knowledge bases"""
|
"""Abstract base class for knowledge bases"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
chunk_size: int = 1000,
|
chunk_size: int = 1000,
|
||||||
chunk_overlap: int = 200,
|
chunk_overlap: int = 200,
|
||||||
embeddings_class: Optional[Embeddings] = None,
|
|
||||||
):
|
):
|
||||||
self.chunk_size = chunk_size
|
self.chunk_size = chunk_size
|
||||||
self.chunk_overlap = chunk_overlap
|
self.chunk_overlap = chunk_overlap
|
||||||
self.chunks: List[str] = []
|
self.chunks: List[str] = []
|
||||||
self.chunk_embeddings: Dict[int, np.ndarray] = {}
|
self.chunk_embeddings: Dict[int, np.ndarray] = {}
|
||||||
self.embeddings_class = embeddings_class or Embeddings()
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def query(self, query: str) -> str:
|
def query(self, query: str) -> str:
|
||||||
@@ -29,18 +29,13 @@ class BaseKnowledgeBase(ABC):
|
|||||||
"""Process and store content in the knowledge base"""
|
"""Process and store content in the knowledge base"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def reset(self) -> None:
|
def embed(self, embedder: BaseEmbedder, new_chunks: List[str]) -> None:
|
||||||
"""Reset the knowledge base"""
|
|
||||||
self.chunks = []
|
|
||||||
self.chunk_embeddings = {}
|
|
||||||
|
|
||||||
def _embed_chunks(self, new_chunks: List[str]) -> None:
|
|
||||||
"""Embed chunks and store them"""
|
"""Embed chunks and store them"""
|
||||||
if not new_chunks:
|
if not new_chunks:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Get embeddings for new chunks
|
# Get embeddings for new chunks
|
||||||
embeddings = self.embeddings_class.embed_texts(new_chunks)
|
embeddings = embedder.embed_texts(new_chunks)
|
||||||
|
|
||||||
# Store embeddings with their corresponding chunks
|
# Store embeddings with their corresponding chunks
|
||||||
start_idx = len(self.chunks)
|
start_idx = len(self.chunks)
|
||||||
@@ -92,13 +87,15 @@ class BaseKnowledgeBase(ABC):
|
|||||||
|
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
def _find_similar_chunks(self, query: str, top_k: int = 3) -> List[str]:
|
def _find_similar_chunks(
|
||||||
|
self, embedder: BaseEmbedder, query: str, top_k: int = 3
|
||||||
|
) -> List[str]:
|
||||||
"""Find the most similar chunks to a query using embeddings"""
|
"""Find the most similar chunks to a query using embeddings"""
|
||||||
if not self.chunks:
|
if not self.chunks:
|
||||||
return []
|
return []
|
||||||
|
|
||||||
# Get query embedding
|
# Get query embedding
|
||||||
query_embedding = self.embeddings_class.embed_text(query)
|
query_embedding = embedder.embed_text(query)
|
||||||
|
|
||||||
# Calculate similarities with all chunks
|
# Calculate similarities with all chunks
|
||||||
similarities = []
|
similarities = []
|
||||||
@@ -1,25 +1,24 @@
|
|||||||
from typing import Optional
|
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||||
from crewai.knowledge.base_knowledge import BaseKnowledgeBase
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
from crewai.knowledge.embeddings import Embeddings
|
|
||||||
|
|
||||||
|
|
||||||
class StringKnowledgeBase(BaseKnowledgeBase):
|
class StringKnowledgeSource(BaseKnowledgeSource):
|
||||||
"""A knowledge base that stores and queries plain text content using embeddings"""
|
"""A knowledge base that stores and queries plain text content using embeddings"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
|
content: str,
|
||||||
chunk_size: int = 1000,
|
chunk_size: int = 1000,
|
||||||
chunk_overlap: int = 200,
|
chunk_overlap: int = 200,
|
||||||
embeddings_class: Optional[Embeddings] = None,
|
|
||||||
content: Optional[str] = None,
|
|
||||||
):
|
):
|
||||||
super().__init__(chunk_size, chunk_overlap, embeddings_class)
|
super().__init__(
|
||||||
if content:
|
chunk_size,
|
||||||
self.add(content)
|
chunk_overlap,
|
||||||
|
)
|
||||||
|
|
||||||
def add(self, content: str) -> None:
|
def add(self, embedder: BaseEmbedder) -> None:
|
||||||
"""Add text content to the knowledge base, chunk it, and compute embeddings"""
|
"""Add text content to the knowledge base, chunk it, and compute embeddings"""
|
||||||
if not isinstance(content, str):
|
if not isinstance(self.content, str):
|
||||||
raise ValueError("StringKnowledgeBase only accepts string content")
|
raise ValueError("StringKnowledgeBase only accepts string content")
|
||||||
|
|
||||||
# Create chunks from the text
|
# Create chunks from the text
|
||||||
@@ -29,12 +28,12 @@ class StringKnowledgeBase(BaseKnowledgeBase):
|
|||||||
self.chunks.extend(new_chunks)
|
self.chunks.extend(new_chunks)
|
||||||
|
|
||||||
# Compute and store embeddings for the new chunks
|
# Compute and store embeddings for the new chunks
|
||||||
self._embed_chunks(new_chunks)
|
embedder.embed_chunks(new_chunks)
|
||||||
|
|
||||||
def query(self, query: str, top_k: int = 3) -> str:
|
def query(self, embedder: BaseEmbedder, query: str, top_k: int = 3) -> str:
|
||||||
"""
|
"""
|
||||||
Query the knowledge base using semantic search
|
Query the knowledge base using semantic search
|
||||||
Returns the most relevant chunk based on embedding similarity
|
Returns the most relevant chunk based on embedding similarity
|
||||||
"""
|
"""
|
||||||
similar_chunks = self._find_similar_chunks(query, top_k=top_k)
|
similar_chunks = self._find_similar_chunks(embedder, query, top_k=top_k)
|
||||||
return similar_chunks[0] if similar_chunks else ""
|
return similar_chunks[0] if similar_chunks else ""
|
||||||
39
src/crewai/knowledge/source/text_file_knowledge_source.py
Normal file
39
src/crewai/knowledge/source/text_file_knowledge_source.py
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
||||||
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
|
class TextFileKnowledgeSource(BaseKnowledgeSource):
|
||||||
|
"""A knowledge base that stores and queries plain text content using embeddings"""
|
||||||
|
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
file_path: str,
|
||||||
|
chunk_size: int = 1000,
|
||||||
|
chunk_overlap: int = 200,
|
||||||
|
):
|
||||||
|
super().__init__(
|
||||||
|
chunk_size,
|
||||||
|
chunk_overlap,
|
||||||
|
)
|
||||||
|
|
||||||
|
def add(self, embedder: BaseEmbedder) -> None:
|
||||||
|
"""Add text content to the knowledge base, chunk it, and compute embeddings"""
|
||||||
|
if not isinstance(self.content, str):
|
||||||
|
raise ValueError("StringKnowledgeBase only accepts string content")
|
||||||
|
|
||||||
|
# Create chunks from the text
|
||||||
|
new_chunks = self._chunk_text(content)
|
||||||
|
|
||||||
|
# Add chunks to the knowledge base
|
||||||
|
self.chunks.extend(new_chunks)
|
||||||
|
|
||||||
|
# Compute and store embeddings for the new chunks
|
||||||
|
embedder.embed_chunks(new_chunks)
|
||||||
|
|
||||||
|
def query(self, embedder: BaseEmbedder, query: str, top_k: int = 3) -> str:
|
||||||
|
"""
|
||||||
|
Query the knowledge base using semantic search
|
||||||
|
Returns the most relevant chunk based on embedding similarity
|
||||||
|
"""
|
||||||
|
similar_chunks = self._find_similar_chunks(embedder, query, top_k=top_k)
|
||||||
|
return similar_chunks[0] if similar_chunks else ""
|
||||||
Reference in New Issue
Block a user