From aab05388d5bcf315f31c01e9ce4c7995919ca23a Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Tue, 3 Dec 2024 10:12:37 -0800 Subject: [PATCH] fixed types --- docs/concepts/knowledge.mdx | 29 ++++++++++++++++--- .../source/base_file_knowledge_source.py | 3 ++ .../source/excel_knowledge_source.py | 4 +-- .../knowledge/source/json_knowledge_source.py | 1 + .../source/text_file_knowledge_source.py | 1 + 5 files changed, 32 insertions(+), 6 deletions(-) diff --git a/docs/concepts/knowledge.mdx b/docs/concepts/knowledge.mdx index 69fa4e644..a00b2c2f0 100644 --- a/docs/concepts/knowledge.mdx +++ b/docs/concepts/knowledge.mdx @@ -156,14 +156,35 @@ crew = Crew( agents=[agent], tasks=[task], knowledge_sources=[source], - embedder_config={ - "model": "BAAI/bge-small-en-v1.5", - "normalize": True, - "max_length": 512 + embedder={ + "provider": "ollama", + "config": {"model": "nomic-embed-text:latest"}, } ) ``` +### Referencing Sources + +You can reference knowledge sources by their collection name or metadata. + +* Add a directory to your crew project called `knowledge`: +* File paths in knowledge can be referenced relative to the `knowledge` directory. + +Example: +A file inside the `knowledge` directory called `example.txt` can be referenced as `example.txt`. + +```python +source = TextFileKnowledgeSource( + file_path="example.txt", # or /example.txt + collection_name="example" +) +crew = Crew( + agents=[agent], + tasks=[task], + knowledge_sources=[source], +) +``` + ## Best Practices diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index ad160683d..ebb3341a8 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -37,6 +37,9 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource): if not isinstance(paths, list): raise ValueError("file_path must be a Path or a list of Paths") + # Ensure all paths are Path objects + paths = [Path(path) if isinstance(path, str) else path for path in paths] + for path in paths: if not path.exists(): self._logger.log( diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py index 3b5c71514..50520b668 100644 --- a/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/src/crewai/knowledge/source/excel_knowledge_source.py @@ -12,9 +12,9 @@ class ExcelKnowledgeSource(BaseFileKnowledgeSource): pd = self._import_dependencies() if isinstance(self.file_path, list): - file_path = self.file_path[0] + file_path = self.convert_to_path(self.file_path[0]) else: - file_path = self.file_path + file_path = self.convert_to_path(self.file_path) df = pd.read_excel(file_path) content = df.to_csv(index=False) diff --git a/src/crewai/knowledge/source/json_knowledge_source.py b/src/crewai/knowledge/source/json_knowledge_source.py index 490423a00..e899bb338 100644 --- a/src/crewai/knowledge/source/json_knowledge_source.py +++ b/src/crewai/knowledge/source/json_knowledge_source.py @@ -15,6 +15,7 @@ class JSONKnowledgeSource(BaseFileKnowledgeSource): content: Dict[Path, str] = {} for path in paths: + path = self.convert_to_path(path) with open(path, "r", encoding="utf-8") as json_file: data = json.load(json_file) content[path] = self._json_to_text(data) diff --git a/src/crewai/knowledge/source/text_file_knowledge_source.py b/src/crewai/knowledge/source/text_file_knowledge_source.py index cbb1aedf5..3078ce05b 100644 --- a/src/crewai/knowledge/source/text_file_knowledge_source.py +++ b/src/crewai/knowledge/source/text_file_knowledge_source.py @@ -13,6 +13,7 @@ class TextFileKnowledgeSource(BaseFileKnowledgeSource): paths = [self.file_path] if isinstance(self.file_path, Path) else self.file_path content = {} for path in paths: + path = Path(path) with open(path, "r", encoding="utf-8") as f: content[path] = f.read() return content