mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 00:28:31 +00:00
cleanup rm unused embedder
This commit is contained in:
@@ -1,8 +1,6 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import ABC, abstractmethod
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
|
|
||||||
|
|
||||||
class BaseKnowledgeSource(ABC):
|
class BaseKnowledgeSource(ABC):
|
||||||
"""Abstract base class for different types of knowledge sources."""
|
"""Abstract base class for different types of knowledge sources."""
|
||||||
@@ -22,11 +20,11 @@ class BaseKnowledgeSource(ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def add(self, embedder: BaseEmbedder) -> None:
|
def add(self) -> None:
|
||||||
"""Add content to the knowledge base, chunk it, and compute embeddings."""
|
"""Add content to the knowledge base, chunk it, and compute embeddings."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def query(self, embedder: BaseEmbedder, query: str, top_k: int = 3) -> str:
|
def query(self, query: str, top_k: int = 3) -> str:
|
||||||
"""Query the knowledge base using semantic search."""
|
"""Query the knowledge base using semantic search."""
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -1,27 +1,35 @@
|
|||||||
from typing import List, Optional, Dict, Any
|
import os
|
||||||
|
|
||||||
|
from typing import List, Optional, Dict, Any
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
from crewai.knowledge.embedder.fastembed import FastEmbed
|
|
||||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||||
|
from crewai.utilities.logger import Logger
|
||||||
|
|
||||||
|
os.environ["TOKENIZERS_PARALLELISM"] = "false" # removes logging from fastembed
|
||||||
|
|
||||||
|
|
||||||
class Knowledge(BaseModel):
|
class Knowledge(BaseModel):
|
||||||
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
|
sources: List[BaseKnowledgeSource] = Field(default_factory=list)
|
||||||
embedder: BaseEmbedder = Field(default_factory=FastEmbed)
|
|
||||||
|
|
||||||
model_config = ConfigDict(arbitrary_types_allowed=True)
|
model_config = ConfigDict(arbitrary_types_allowed=True)
|
||||||
agents: List[str] = Field(default_factory=list)
|
|
||||||
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
|
storage: KnowledgeStorage = Field(default_factory=KnowledgeStorage)
|
||||||
embedder_config: Optional[Dict[str, Any]] = Field(default_factory=None)
|
embedder_config: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
def __init__(self, **data):
|
def __init__(self, embedder_config: Optional[Dict[str, Any]] = None, **data):
|
||||||
super().__init__(**data)
|
super().__init__(**data)
|
||||||
embedder_config = data.get("embedder_config", None)
|
|
||||||
if embedder_config:
|
if embedder_config:
|
||||||
self.storage = KnowledgeStorage(embedder_config=embedder_config)
|
self.storage = KnowledgeStorage(embedder_config=embedder_config)
|
||||||
|
else:
|
||||||
|
self.storage = KnowledgeStorage()
|
||||||
|
|
||||||
|
try:
|
||||||
|
for source in self.sources:
|
||||||
|
source.add()
|
||||||
|
except Exception as e:
|
||||||
|
Logger.log(
|
||||||
|
"warning", f"Failed to add some sources during initialization: {e}"
|
||||||
|
)
|
||||||
|
|
||||||
def query(
|
def query(
|
||||||
self, query: List[str], limit: int = 3, preference: Optional[str] = None
|
self, query: List[str], limit: int = 3, preference: Optional[str] = None
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from typing import List
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from pydantic import BaseModel, ConfigDict, Field
|
from pydantic import BaseModel, ConfigDict, Field
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
|
||||||
from typing import Dict, Any
|
from typing import Dict, Any
|
||||||
|
|
||||||
@@ -27,7 +26,7 @@ class BaseKnowledgeSource(BaseModel, ABC):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def add(self, embedder: BaseEmbedder) -> None:
|
def add(self) -> None:
|
||||||
"""Process content, chunk it, compute embeddings, and save them."""
|
"""Process content, chunk it, compute embeddings, and save them."""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import csv
|
import csv
|
||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
@@ -18,17 +17,13 @@ class CSVKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
content += " ".join(row) + "\n"
|
content += " ".join(row) + "\n"
|
||||||
return content
|
return content
|
||||||
|
|
||||||
def add(self, embedder: BaseEmbedder) -> None:
|
def add(self) -> None:
|
||||||
"""
|
"""
|
||||||
Add CSV file content to the knowledge source, chunk it, compute embeddings,
|
Add CSV file content to the knowledge source, chunk it, compute embeddings,
|
||||||
and save the embeddings.
|
and save the embeddings.
|
||||||
"""
|
"""
|
||||||
new_chunks = self._chunk_text(self.content)
|
new_chunks = self._chunk_text(self.content)
|
||||||
self.chunks.extend(new_chunks)
|
self.chunks.extend(new_chunks)
|
||||||
# Compute embeddings for the new chunks
|
|
||||||
new_embeddings = embedder.embed_chunks(new_chunks)
|
|
||||||
# Save the embeddings
|
|
||||||
self.chunk_embeddings.extend(new_embeddings)
|
|
||||||
self.save_documents(metadata=self.metadata)
|
self.save_documents(metadata=self.metadata)
|
||||||
|
|
||||||
def _chunk_text(self, text: str) -> List[str]:
|
def _chunk_text(self, text: str) -> List[str]:
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
from typing import List
|
from typing import List
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
@@ -28,17 +27,13 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
|
f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
|
||||||
)
|
)
|
||||||
|
|
||||||
def add(self, embedder: BaseEmbedder) -> None:
|
def add(self) -> None:
|
||||||
"""
|
"""
|
||||||
Add Excel file content to the knowledge source, chunk it, compute embeddings,
|
Add Excel file content to the knowledge source, chunk it, compute embeddings,
|
||||||
and save the embeddings.
|
and save the embeddings.
|
||||||
"""
|
"""
|
||||||
new_chunks = self._chunk_text(self.content)
|
new_chunks = self._chunk_text(self.content)
|
||||||
self.chunks.extend(new_chunks)
|
self.chunks.extend(new_chunks)
|
||||||
# Compute embeddings for the new chunks
|
|
||||||
new_embeddings = embedder.embed_chunks(new_chunks)
|
|
||||||
# Save the embeddings
|
|
||||||
self.chunk_embeddings.extend(new_embeddings)
|
|
||||||
self.save_documents(metadata=self.metadata)
|
self.save_documents(metadata=self.metadata)
|
||||||
|
|
||||||
def _chunk_text(self, text: str) -> List[str]:
|
def _chunk_text(self, text: str) -> List[str]:
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
import json
|
import json
|
||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
@@ -30,17 +29,13 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
text += f"{str(data)}"
|
text += f"{str(data)}"
|
||||||
return text
|
return text
|
||||||
|
|
||||||
def add(self, embedder: BaseEmbedder) -> None:
|
def add(self) -> None:
|
||||||
"""
|
"""
|
||||||
Add JSON file content to the knowledge source, chunk it, compute embeddings,
|
Add JSON file content to the knowledge source, chunk it, compute embeddings,
|
||||||
and save the embeddings.
|
and save the embeddings.
|
||||||
"""
|
"""
|
||||||
new_chunks = self._chunk_text(self.content)
|
new_chunks = self._chunk_text(self.content)
|
||||||
self.chunks.extend(new_chunks)
|
self.chunks.extend(new_chunks)
|
||||||
# Compute embeddings for the new chunks
|
|
||||||
new_embeddings = embedder.embed_chunks(new_chunks)
|
|
||||||
# Save the embeddings
|
|
||||||
self.chunk_embeddings.extend(new_embeddings)
|
|
||||||
self.save_documents(metadata=self.metadata)
|
self.save_documents(metadata=self.metadata)
|
||||||
|
|
||||||
def _chunk_text(self, text: str) -> List[str]:
|
def _chunk_text(self, text: str) -> List[str]:
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from typing import List, Dict
|
from typing import List, Dict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from crewai.knowledge.embedder.base_embedder import BaseEmbedder
|
|
||||||
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
from crewai.knowledge.source.base_file_knowledge_source import BaseFileKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
@@ -37,7 +36,7 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
"pdfplumber is not installed. Please install it with: pip install pdfplumber"
|
"pdfplumber is not installed. Please install it with: pip install pdfplumber"
|
||||||
)
|
)
|
||||||
|
|
||||||
def add(self, embedder: BaseEmbedder) -> None:
|
def add(self) -> None:
|
||||||
"""
|
"""
|
||||||
Add PDF file content to the knowledge source, chunk it, compute embeddings,
|
Add PDF file content to the knowledge source, chunk it, compute embeddings,
|
||||||
and save the embeddings.
|
and save the embeddings.
|
||||||
@@ -45,10 +44,6 @@ class PDFKnowledgeSource(BaseFileKnowledgeSource):
|
|||||||
for _, text in self.content.items():
|
for _, text in self.content.items():
|
||||||
new_chunks = self._chunk_text(text)
|
new_chunks = self._chunk_text(text)
|
||||||
self.chunks.extend(new_chunks)
|
self.chunks.extend(new_chunks)
|
||||||
# Compute embeddings for the new chunks
|
|
||||||
new_embeddings = embedder.embed_chunks(new_chunks)
|
|
||||||
# Save the embeddings
|
|
||||||
self.chunk_embeddings.extend(new_embeddings)
|
|
||||||
self.save_documents(metadata=self.metadata)
|
self.save_documents(metadata=self.metadata)
|
||||||
|
|
||||||
def _chunk_text(self, text: str) -> List[str]:
|
def _chunk_text(self, text: str) -> List[str]:
|
||||||
|
|||||||
@@ -36,7 +36,7 @@ class KnowledgeStorage(BaseKnowledgeStorage):
|
|||||||
|
|
||||||
collection: Optional[chromadb.Collection] = None
|
collection: Optional[chromadb.Collection] = None
|
||||||
|
|
||||||
def __init__(self, embedder_config=None):
|
def __init__(self, embedder_config: Optional[Dict[str, Any]] = None):
|
||||||
self._initialize_app(embedder_config or {})
|
self._initialize_app(embedder_config or {})
|
||||||
|
|
||||||
def search(
|
def search(
|
||||||
|
|||||||
Reference in New Issue
Block a user