From 77065f2151634811352730e6ad284bd844596384 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Tue, 16 Sep 2025 00:20:08 +0000 Subject: [PATCH] fix: Address lint issues in external knowledge directory implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Remove unnecessary variable assignment in paths.py (RET504) - Add proper exception chaining in crew_docling_source.py and excel_knowledge_source.py (B904) - Use next(iter(...)) instead of list(...)[0] in test files (RUF015) Note: N805 error about cls vs self in field_validator is a false positive - Pydantic field validators correctly use cls as first parameter Co-Authored-By: João --- .../knowledge/source/crew_docling_source.py | 23 +++++------ .../source/excel_knowledge_source.py | 25 +++++------- src/crewai/utilities/paths.py | 11 ++--- .../test_external_knowledge_directory.py | 40 ++++++++++--------- 4 files changed, 47 insertions(+), 52 deletions(-) diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py index db70307a3..a3495774d 100644 --- a/src/crewai/knowledge/source/crew_docling_source.py +++ b/src/crewai/knowledge/source/crew_docling_source.py @@ -1,5 +1,5 @@ +from collections.abc import Iterator from pathlib import Path -from typing import Iterator, List, Optional, Union from urllib.parse import urlparse try: @@ -16,7 +16,6 @@ except ImportError: from pydantic import Field from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource -from crewai.utilities.constants import KNOWLEDGE_DIRECTORY from crewai.utilities.logger import Logger from crewai.utilities.paths import get_knowledge_directory @@ -36,11 +35,11 @@ class CrewDoclingSource(BaseKnowledgeSource): _logger: Logger = Logger(verbose=True) - file_path: Optional[List[Union[Path, str]]] = Field(default=None) - file_paths: List[Union[Path, str]] = Field(default_factory=list) - chunks: List[str] = Field(default_factory=list) - safe_file_paths: List[Union[Path, str]] = Field(default_factory=list) - content: List["DoclingDocument"] = Field(default_factory=list) + file_path: list[Path | str] | None = Field(default=None) + file_paths: list[Path | str] = Field(default_factory=list) + chunks: list[str] = Field(default_factory=list) + safe_file_paths: list[Path | str] = Field(default_factory=list) + content: list["DoclingDocument"] = Field(default_factory=list) document_converter: "DocumentConverter" = Field( default_factory=lambda: DocumentConverter( allowed_formats=[ @@ -67,7 +66,7 @@ class CrewDoclingSource(BaseKnowledgeSource): self.safe_file_paths = self.validate_content() self.content = self._load_content() - def _load_content(self) -> List["DoclingDocument"]: + def _load_content(self) -> list["DoclingDocument"]: try: return self._convert_source_to_docling_documents() except ConversionError as e: @@ -89,7 +88,7 @@ class CrewDoclingSource(BaseKnowledgeSource): self.chunks.extend(list(new_chunks_iterable)) self._save_documents() - def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]: + def _convert_source_to_docling_documents(self) -> list["DoclingDocument"]: conv_results_iter = self.document_converter.convert_all(self.safe_file_paths) return [result.document for result in conv_results_iter] @@ -98,8 +97,8 @@ class CrewDoclingSource(BaseKnowledgeSource): for chunk in chunker.chunk(doc): yield chunk.text - def validate_content(self) -> List[Union[Path, str]]: - processed_paths: List[Union[Path, str]] = [] + def validate_content(self) -> list[Path | str]: + processed_paths: list[Path | str] = [] for path in self.file_paths: if isinstance(path, str): if path.startswith(("http://", "https://")): @@ -109,7 +108,7 @@ class CrewDoclingSource(BaseKnowledgeSource): else: raise ValueError(f"Invalid URL format: {path}") except Exception as e: - raise ValueError(f"Invalid URL: {path}. Error: {str(e)}") + raise ValueError(f"Invalid URL: {path}. Error: {e!s}") from e else: local_path = Path(get_knowledge_directory() + "/" + path) if local_path.exists(): diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py index 201f8f61c..363f5d4e2 100644 --- a/src/crewai/knowledge/source/excel_knowledge_source.py +++ b/src/crewai/knowledge/source/excel_knowledge_source.py @@ -1,11 +1,8 @@ from pathlib import Path -from typing import Dict, Iterator, List, Optional, Union -from urllib.parse import urlparse from pydantic import Field, field_validator from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource -from crewai.utilities.constants import KNOWLEDGE_DIRECTORY from crewai.utilities.logger import Logger from crewai.utilities.paths import get_knowledge_directory @@ -17,16 +14,16 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): _logger: Logger = Logger(verbose=True) - file_path: Optional[Union[Path, List[Path], str, List[str]]] = Field( + file_path: Path | list[Path] | str | list[str] | None = Field( default=None, description="[Deprecated] The path to the file. Use file_paths instead.", ) - file_paths: Optional[Union[Path, List[Path], str, List[str]]] = Field( + file_paths: Path | list[Path] | str | list[str] | None = Field( default_factory=list, description="The path to the file" ) - chunks: List[str] = Field(default_factory=list) - content: Dict[Path, Dict[str, str]] = Field(default_factory=dict) - safe_file_paths: List[Path] = Field(default_factory=list) + chunks: list[str] = Field(default_factory=list) + content: dict[Path, dict[str, str]] = Field(default_factory=dict) + safe_file_paths: list[Path] = Field(default_factory=list) @field_validator("file_path", "file_paths", mode="before") def validate_file_path(cls, v, info): @@ -42,7 +39,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): raise ValueError("Either file_path or file_paths must be provided") return v - def _process_file_paths(self) -> List[Path]: + def _process_file_paths(self) -> list[Path]: """Convert file_path to a list of Path objects.""" if hasattr(self, "file_path") and self.file_path is not None: @@ -57,7 +54,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): raise ValueError("Your source must be provided with a file_paths: []") # Convert single path to list - path_list: List[Union[Path, str]] = ( + path_list: list[Path | str] = ( [self.file_paths] if isinstance(self.file_paths, (str, Path)) else list(self.file_paths) @@ -101,7 +98,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): self.validate_content() self.content = self._load_content() - def _load_content(self) -> Dict[Path, Dict[str, str]]: + def _load_content(self) -> dict[Path, dict[str, str]]: """Load and preprocess Excel file content from multiple sheets. Each sheet's content is converted to CSV format and stored. @@ -127,7 +124,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): content_dict[file_path] = sheet_dict return content_dict - def convert_to_path(self, path: Union[Path, str]) -> Path: + def convert_to_path(self, path: Path | str) -> Path: """Convert a path to a Path object.""" return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path @@ -141,7 +138,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): missing_package = str(e).split()[-1] raise ImportError( f"{missing_package} is not installed. Please install it with: pip install {missing_package}" - ) + ) from e def add(self) -> None: """ @@ -162,7 +159,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource): self.chunks.extend(new_chunks) self._save_documents() - def _chunk_text(self, text: str) -> List[str]: + def _chunk_text(self, text: str) -> list[str]: """Utility method to split text into chunks.""" return [ text[i : i + self.chunk_size] diff --git a/src/crewai/utilities/paths.py b/src/crewai/utilities/paths.py index d780fa65a..b91ba013d 100644 --- a/src/crewai/utilities/paths.py +++ b/src/crewai/utilities/paths.py @@ -25,20 +25,17 @@ def get_project_directory_name(): if project_directory_name: return project_directory_name - else: - cwd = Path.cwd() - project_directory_name = cwd.name - return project_directory_name + cwd = Path.cwd() + return cwd.name def get_knowledge_directory(): """Returns the knowledge directory path from environment variable or default.""" knowledge_dir = os.environ.get("CREWAI_KNOWLEDGE_FILE_DIR") - + if knowledge_dir: knowledge_path = Path(knowledge_dir) if not knowledge_path.exists(): raise ValueError(f"Knowledge directory does not exist: {knowledge_dir}") return str(knowledge_path) - else: - return "knowledge" + return "knowledge" diff --git a/tests/knowledge/test_external_knowledge_directory.py b/tests/knowledge/test_external_knowledge_directory.py index 50807468c..b9ca8ed89 100644 --- a/tests/knowledge/test_external_knowledge_directory.py +++ b/tests/knowledge/test_external_knowledge_directory.py @@ -1,9 +1,11 @@ import os import tempfile from pathlib import Path + import pytest -from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource + from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource +from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource class TestExternalKnowledgeDirectory: @@ -13,15 +15,15 @@ class TestExternalKnowledgeDirectory: test_file = Path(temp_dir) / "test.txt" test_content = "This is a test file for external knowledge directory." test_file.write_text(test_content) - + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir try: source = TextFileKnowledgeSource(file_paths=["test.txt"]) - + assert len(source.content) == 1 - loaded_content = list(source.content.values())[0] + loaded_content = next(iter(source.content.values())) assert loaded_content == test_content - + finally: del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] @@ -32,17 +34,17 @@ class TestExternalKnowledgeDirectory: test_data = {"name": "John", "age": 30, "city": "New York"} import json test_file.write_text(json.dumps(test_data)) - + os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir try: source = JSONKnowledgeSource(file_paths=["test.json"]) - + assert len(source.content) == 1 - loaded_content = list(source.content.values())[0] + loaded_content = next(iter(source.content.values())) assert "John" in loaded_content assert "30" in loaded_content assert "New York" in loaded_content - + finally: del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] @@ -50,21 +52,21 @@ class TestExternalKnowledgeDirectory: """Test that knowledge sources fall back to default directory when env var not set.""" if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ: del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] - + knowledge_dir = Path("knowledge") knowledge_dir.mkdir(exist_ok=True) test_file = knowledge_dir / "test_fallback.txt" test_content = "This is a test file for default knowledge directory." - + try: test_file.write_text(test_content) - + source = TextFileKnowledgeSource(file_paths=["test_fallback.txt"]) - + assert len(source.content) == 1 - loaded_content = list(source.content.values())[0] + loaded_content = next(iter(source.content.values())) assert loaded_content == test_content - + finally: if test_file.exists(): test_file.unlink() @@ -75,16 +77,16 @@ class TestExternalKnowledgeDirectory: test_file = Path(temp_dir) / "test_absolute.txt" test_content = "This is a test file with absolute path." test_file.write_text(test_content) - + with tempfile.TemporaryDirectory() as other_dir: os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = other_dir try: source = TextFileKnowledgeSource(file_paths=[str(test_file)]) - + assert len(source.content) == 1 - loaded_content = list(source.content.values())[0] + loaded_content = next(iter(source.content.values())) assert loaded_content == test_content - + finally: del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]