From 101cee8a279802b24603a371f2b9df4fa35102a0 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 00:11:41 +0000 Subject: [PATCH] feat: Add support for external knowledge directory via CREWAI_KNOWLEDGE_FILE_DIR MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add get_knowledge_directory() utility function following CREWAI_STORAGE_DIR pattern - Update BaseFileKnowledgeSource, CrewDoclingSource, and ExcelKnowledgeSource to use new function - Add comprehensive tests for utility function and knowledge source integration - Maintain backward compatibility with default 'knowledge' directory - Add proper error handling for non-existent directories Fixes #3519 Co-Authored-By: João --- .../source/base_file_knowledge_source.py | 3 +- .../knowledge/source/crew_docling_source.py | 3 +- .../source/excel_knowledge_source.py | 3 +- src/crewai/utilities/paths.py | 15 ++- .../test_external_knowledge_directory.py | 99 +++++++++++++++++++ tests/utilities/test_knowledge_directory.py | 63 ++++++++++++ 6 files changed, 182 insertions(+), 4 deletions(-) create mode 100644 tests/knowledge/test_external_knowledge_directory.py create mode 100644 tests/utilities/test_knowledge_directory.py diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index 4c4b9b337..7a82a1108 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -8,6 +8,7 @@ from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage from crewai.utilities.constants import KNOWLEDGE_DIRECTORY from crewai.utilities.logger import Logger +from crewai.utilities.paths import get_knowledge_directory class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): @@ -76,7 +77,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): def convert_to_path(self, path: Union[Path, str]) -> Path: """Convert a path to a Path object.""" - return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path + return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path def _process_file_paths(self) -> List[Path]: """Convert file_path to a list of Path objects.""" diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py index 6ca0ae967..db70307a3 100644 --- a/src/crewai/knowledge/source/crew_docling_source.py +++ b/src/crewai/knowledge/source/crew_docling_source.py @@ -18,6 +18,7 @@ from pydantic import Field from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.utilities.constants import KNOWLEDGE_DIRECTORY from crewai.utilities.logger import Logger +from crewai.utilities.paths import get_knowledge_directory class CrewDoclingSource(BaseKnowledgeSource): @@ -110,7 +111,7 @@ class CrewDoclingSource(BaseKnowledgeSource): except Exception as e: raise ValueError(f"Invalid URL: {path}. Error: {str(e)}") else: - local_path = Path(KNOWLEDGE_DIRECTORY + "/" + path) + local_path = Path(get_knowledge_directory() + "/" + path) if local_path.exists(): processed_paths.append(local_path) else: diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py index a73afb1df..201f8f61c 100644 --- a/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/src/crewai/knowledge/source/excel_knowledge_source.py @@ -7,6 +7,7 @@ from pydantic import Field, field_validator from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource from crewai.utilities.constants import KNOWLEDGE_DIRECTORY from crewai.utilities.logger import Logger +from crewai.utilities.paths import get_knowledge_directory class ExcelKnowledgeSource(BaseKnowledgeSource): @@ -128,7 +129,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): def convert_to_path(self, path: Union[Path, str]) -> Path: """Convert a path to a Path object.""" - return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path + return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path def _import_dependencies(self): """Dynamically import dependencies.""" diff --git a/src/crewai/utilities/paths.py b/src/crewai/utilities/paths.py index 853c612c3..d780fa65a 100644 --- a/src/crewai/utilities/paths.py +++ b/src/crewai/utilities/paths.py @@ -28,4 +28,17 @@ def get_project_directory_name(): else: cwd = Path.cwd() project_directory_name = cwd.name - return project_directory_name \ No newline at end of file + return project_directory_name + + +def get_knowledge_directory(): + """Returns the knowledge directory path from environment variable or default.""" + knowledge_dir = os.environ.get("CREWAI_KNOWLEDGE_FILE_DIR") + + if knowledge_dir: + knowledge_path = Path(knowledge_dir) + if not knowledge_path.exists(): + raise ValueError(f"Knowledge directory does not exist: {knowledge_dir}") + return str(knowledge_path) + else: + return "knowledge" diff --git a/tests/knowledge/test_external_knowledge_directory.py b/tests/knowledge/test_external_knowledge_directory.py new file mode 100644 index 000000000..50807468c --- /dev/null +++ b/tests/knowledge/test_external_knowledge_directory.py @@ -0,0 +1,99 @@ +import os +import tempfile +from pathlib import Path +import pytest +from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource +from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource + + +class TestExternalKnowledgeDirectory: + def test_text_file_source_with_external_directory(self): + """Test that TextFileKnowledgeSource works with external directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_file = Path(temp_dir) / "test.txt" + test_content = "This is a test file for external knowledge directory." + test_file.write_text(test_content) + + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir + try: + source = TextFileKnowledgeSource(file_paths=["test.txt"]) + + assert len(source.content) == 1 + loaded_content = list(source.content.values())[0] + assert loaded_content == test_content + + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + def test_json_file_source_with_external_directory(self): + """Test that JSONKnowledgeSource works with external directory.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_file = Path(temp_dir) / "test.json" + test_data = {"name": "John", "age": 30, "city": "New York"} + import json + test_file.write_text(json.dumps(test_data)) + + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir + try: + source = JSONKnowledgeSource(file_paths=["test.json"]) + + assert len(source.content) == 1 + loaded_content = list(source.content.values())[0] + assert "John" in loaded_content + assert "30" in loaded_content + assert "New York" in loaded_content + + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + def test_knowledge_source_fallback_to_default(self): + """Test that knowledge sources fall back to default directory when env var not set.""" + if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + knowledge_dir = Path("knowledge") + knowledge_dir.mkdir(exist_ok=True) + test_file = knowledge_dir / "test_fallback.txt" + test_content = "This is a test file for default knowledge directory." + + try: + test_file.write_text(test_content) + + source = TextFileKnowledgeSource(file_paths=["test_fallback.txt"]) + + assert len(source.content) == 1 + loaded_content = list(source.content.values())[0] + assert loaded_content == test_content + + finally: + if test_file.exists(): + test_file.unlink() + + def test_knowledge_source_with_absolute_path_ignores_env_var(self): + """Test that absolute paths ignore the environment variable.""" + with tempfile.TemporaryDirectory() as temp_dir: + test_file = Path(temp_dir) / "test_absolute.txt" + test_content = "This is a test file with absolute path." + test_file.write_text(test_content) + + with tempfile.TemporaryDirectory() as other_dir: + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = other_dir + try: + source = TextFileKnowledgeSource(file_paths=[str(test_file)]) + + assert len(source.content) == 1 + loaded_content = list(source.content.values())[0] + assert loaded_content == test_content + + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + def test_knowledge_source_error_with_invalid_external_directory(self): + """Test that proper error is raised when external directory doesn't exist.""" + invalid_dir = "/path/that/does/not/exist" + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = invalid_dir + try: + with pytest.raises(ValueError, match="Knowledge directory does not exist"): + TextFileKnowledgeSource(file_paths=["test.txt"]) + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] diff --git a/tests/utilities/test_knowledge_directory.py b/tests/utilities/test_knowledge_directory.py new file mode 100644 index 000000000..7bbe1e973 --- /dev/null +++ b/tests/utilities/test_knowledge_directory.py @@ -0,0 +1,63 @@ +import os +import tempfile +from pathlib import Path +import pytest +from crewai.utilities.paths import get_knowledge_directory + + +class TestKnowledgeDirectory: + def test_default_knowledge_directory(self): + """Test that default knowledge directory is returned when env var not set.""" + if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + result = get_knowledge_directory() + assert result == "knowledge" + + def test_custom_knowledge_directory(self): + """Test that custom directory is returned when env var is set.""" + with tempfile.TemporaryDirectory() as temp_dir: + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir + try: + result = get_knowledge_directory() + assert result == temp_dir + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + def test_invalid_knowledge_directory(self): + """Test that ValueError is raised for non-existent directory.""" + invalid_dir = "/path/that/does/not/exist" + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = invalid_dir + try: + with pytest.raises(ValueError, match="Knowledge directory does not exist"): + get_knowledge_directory() + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + def test_relative_path_knowledge_directory(self): + """Test that relative paths work correctly.""" + with tempfile.TemporaryDirectory() as temp_dir: + sub_dir = Path(temp_dir) / "knowledge_files" + sub_dir.mkdir() + + original_cwd = os.getcwd() + try: + os.chdir(temp_dir) + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = "knowledge_files" + + result = get_knowledge_directory() + assert result == str(sub_dir) + finally: + os.chdir(original_cwd) + if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] + + def test_absolute_path_knowledge_directory(self): + """Test that absolute paths work correctly.""" + with tempfile.TemporaryDirectory() as temp_dir: + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir + try: + result = get_knowledge_directory() + assert result == temp_dir + finally: + del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]