feat: Add support for external knowledge directory via CREWAI_KNOWLEDGE_FILE_DIR

- Add get_knowledge_directory() utility function following CREWAI_STORAGE_DIR pattern
- Update BaseFileKnowledgeSource, CrewDoclingSource, and ExcelKnowledgeSource to use new function
- Add comprehensive tests for utility function and knowledge source integration
- Maintain backward compatibility with default 'knowledge' directory
- Add proper error handling for non-existent directories

Fixes #3519

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-09-16 00:11:41 +00:00
parent 81bd81e5f5
commit 101cee8a27
6 changed files with 182 additions and 4 deletions

View File

@@ -8,6 +8,7 @@ from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.knowledge.storage.knowledge_storage import KnowledgeStorage
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
from crewai.utilities.logger import Logger
from crewai.utilities.paths import get_knowledge_directory
class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
@@ -76,7 +77,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC):
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path
def _process_file_paths(self) -> List[Path]:
"""Convert file_path to a list of Path objects."""

View File

@@ -18,6 +18,7 @@ from pydantic import Field
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
from crewai.utilities.logger import Logger
from crewai.utilities.paths import get_knowledge_directory
class CrewDoclingSource(BaseKnowledgeSource):
@@ -110,7 +111,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
except Exception as e:
raise ValueError(f"Invalid URL: {path}. Error: {str(e)}")
else:
local_path = Path(KNOWLEDGE_DIRECTORY + "/" + path)
local_path = Path(get_knowledge_directory() + "/" + path)
if local_path.exists():
processed_paths.append(local_path)
else:

View File

@@ -7,6 +7,7 @@ from pydantic import Field, field_validator
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
from crewai.utilities.logger import Logger
from crewai.utilities.paths import get_knowledge_directory
class ExcelKnowledgeSource(BaseKnowledgeSource):
@@ -128,7 +129,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
def convert_to_path(self, path: Union[Path, str]) -> Path:
"""Convert a path to a Path object."""
return Path(KNOWLEDGE_DIRECTORY + "/" + path) if isinstance(path, str) else path
return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path
def _import_dependencies(self):
"""Dynamically import dependencies."""

View File

@@ -28,4 +28,17 @@ def get_project_directory_name():
else:
cwd = Path.cwd()
project_directory_name = cwd.name
return project_directory_name
return project_directory_name
def get_knowledge_directory():
"""Returns the knowledge directory path from environment variable or default."""
knowledge_dir = os.environ.get("CREWAI_KNOWLEDGE_FILE_DIR")
if knowledge_dir:
knowledge_path = Path(knowledge_dir)
if not knowledge_path.exists():
raise ValueError(f"Knowledge directory does not exist: {knowledge_dir}")
return str(knowledge_path)
else:
return "knowledge"

View File

@@ -0,0 +1,99 @@
import os
import tempfile
from pathlib import Path
import pytest
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
class TestExternalKnowledgeDirectory:
def test_text_file_source_with_external_directory(self):
"""Test that TextFileKnowledgeSource works with external directory."""
with tempfile.TemporaryDirectory() as temp_dir:
test_file = Path(temp_dir) / "test.txt"
test_content = "This is a test file for external knowledge directory."
test_file.write_text(test_content)
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir
try:
source = TextFileKnowledgeSource(file_paths=["test.txt"])
assert len(source.content) == 1
loaded_content = list(source.content.values())[0]
assert loaded_content == test_content
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
def test_json_file_source_with_external_directory(self):
"""Test that JSONKnowledgeSource works with external directory."""
with tempfile.TemporaryDirectory() as temp_dir:
test_file = Path(temp_dir) / "test.json"
test_data = {"name": "John", "age": 30, "city": "New York"}
import json
test_file.write_text(json.dumps(test_data))
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir
try:
source = JSONKnowledgeSource(file_paths=["test.json"])
assert len(source.content) == 1
loaded_content = list(source.content.values())[0]
assert "John" in loaded_content
assert "30" in loaded_content
assert "New York" in loaded_content
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
def test_knowledge_source_fallback_to_default(self):
"""Test that knowledge sources fall back to default directory when env var not set."""
if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
knowledge_dir = Path("knowledge")
knowledge_dir.mkdir(exist_ok=True)
test_file = knowledge_dir / "test_fallback.txt"
test_content = "This is a test file for default knowledge directory."
try:
test_file.write_text(test_content)
source = TextFileKnowledgeSource(file_paths=["test_fallback.txt"])
assert len(source.content) == 1
loaded_content = list(source.content.values())[0]
assert loaded_content == test_content
finally:
if test_file.exists():
test_file.unlink()
def test_knowledge_source_with_absolute_path_ignores_env_var(self):
"""Test that absolute paths ignore the environment variable."""
with tempfile.TemporaryDirectory() as temp_dir:
test_file = Path(temp_dir) / "test_absolute.txt"
test_content = "This is a test file with absolute path."
test_file.write_text(test_content)
with tempfile.TemporaryDirectory() as other_dir:
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = other_dir
try:
source = TextFileKnowledgeSource(file_paths=[str(test_file)])
assert len(source.content) == 1
loaded_content = list(source.content.values())[0]
assert loaded_content == test_content
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
def test_knowledge_source_error_with_invalid_external_directory(self):
"""Test that proper error is raised when external directory doesn't exist."""
invalid_dir = "/path/that/does/not/exist"
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = invalid_dir
try:
with pytest.raises(ValueError, match="Knowledge directory does not exist"):
TextFileKnowledgeSource(file_paths=["test.txt"])
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]

View File

@@ -0,0 +1,63 @@
import os
import tempfile
from pathlib import Path
import pytest
from crewai.utilities.paths import get_knowledge_directory
class TestKnowledgeDirectory:
def test_default_knowledge_directory(self):
"""Test that default knowledge directory is returned when env var not set."""
if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
result = get_knowledge_directory()
assert result == "knowledge"
def test_custom_knowledge_directory(self):
"""Test that custom directory is returned when env var is set."""
with tempfile.TemporaryDirectory() as temp_dir:
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir
try:
result = get_knowledge_directory()
assert result == temp_dir
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
def test_invalid_knowledge_directory(self):
"""Test that ValueError is raised for non-existent directory."""
invalid_dir = "/path/that/does/not/exist"
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = invalid_dir
try:
with pytest.raises(ValueError, match="Knowledge directory does not exist"):
get_knowledge_directory()
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
def test_relative_path_knowledge_directory(self):
"""Test that relative paths work correctly."""
with tempfile.TemporaryDirectory() as temp_dir:
sub_dir = Path(temp_dir) / "knowledge_files"
sub_dir.mkdir()
original_cwd = os.getcwd()
try:
os.chdir(temp_dir)
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = "knowledge_files"
result = get_knowledge_directory()
assert result == str(sub_dir)
finally:
os.chdir(original_cwd)
if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
def test_absolute_path_knowledge_directory(self):
"""Test that absolute paths work correctly."""
with tempfile.TemporaryDirectory() as temp_dir:
os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir
try:
result = get_knowledge_directory()
assert result == temp_dir
finally:
del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]