From 77065f2151634811352730e6ad284bd844596384 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 16 Sep 2025 00:20:08 +0000
Subject: [PATCH] fix: Address lint issues in external knowledge directory
 implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Remove unnecessary variable assignment in paths.py (RET504)
- Add proper exception chaining in crew_docling_source.py and excel_knowledge_source.py (B904)
- Use next(iter(...)) instead of list(...)[0] in test files (RUF015)

Note: N805 error about cls vs self in field_validator is a false positive -
Pydantic field validators correctly use cls as first parameter

Co-Authored-By: João <joao@crewai.com>
---
 .../knowledge/source/crew_docling_source.py   | 23 +++++------
 .../source/excel_knowledge_source.py          | 25 +++++-------
 src/crewai/utilities/paths.py                 | 11 ++---
 .../test_external_knowledge_directory.py      | 40 ++++++++++---------
 4 files changed, 47 insertions(+), 52 deletions(-)

diff --git a/src/crewai/knowledge/source/crew_docling_source.py b/src/crewai/knowledge/source/crew_docling_source.py
index db70307a3..a3495774d 100644
--- a/src/crewai/knowledge/source/crew_docling_source.py
+++ b/src/crewai/knowledge/source/crew_docling_source.py
@@ -1,5 +1,5 @@
+from collections.abc import Iterator
 from pathlib import Path
-from typing import Iterator, List, Optional, Union
 from urllib.parse import urlparse
 
 try:
@@ -16,7 +16,6 @@ except ImportError:
 from pydantic import Field
 
 from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
-from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
 from crewai.utilities.logger import Logger
 from crewai.utilities.paths import get_knowledge_directory
 
@@ -36,11 +35,11 @@ class CrewDoclingSource(BaseKnowledgeSource):
 
     _logger: Logger = Logger(verbose=True)
 
-    file_path: Optional[List[Union[Path, str]]] = Field(default=None)
-    file_paths: List[Union[Path, str]] = Field(default_factory=list)
-    chunks: List[str] = Field(default_factory=list)
-    safe_file_paths: List[Union[Path, str]] = Field(default_factory=list)
-    content: List["DoclingDocument"] = Field(default_factory=list)
+    file_path: list[Path | str] | None = Field(default=None)
+    file_paths: list[Path | str] = Field(default_factory=list)
+    chunks: list[str] = Field(default_factory=list)
+    safe_file_paths: list[Path | str] = Field(default_factory=list)
+    content: list["DoclingDocument"] = Field(default_factory=list)
     document_converter: "DocumentConverter" = Field(
         default_factory=lambda: DocumentConverter(
             allowed_formats=[
@@ -67,7 +66,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
         self.safe_file_paths = self.validate_content()
         self.content = self._load_content()
 
-    def _load_content(self) -> List["DoclingDocument"]:
+    def _load_content(self) -> list["DoclingDocument"]:
         try:
             return self._convert_source_to_docling_documents()
         except ConversionError as e:
@@ -89,7 +88,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
             self.chunks.extend(list(new_chunks_iterable))
         self._save_documents()
 
-    def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]:
+    def _convert_source_to_docling_documents(self) -> list["DoclingDocument"]:
         conv_results_iter = self.document_converter.convert_all(self.safe_file_paths)
         return [result.document for result in conv_results_iter]
 
@@ -98,8 +97,8 @@ class CrewDoclingSource(BaseKnowledgeSource):
         for chunk in chunker.chunk(doc):
             yield chunk.text
 
-    def validate_content(self) -> List[Union[Path, str]]:
-        processed_paths: List[Union[Path, str]] = []
+    def validate_content(self) -> list[Path | str]:
+        processed_paths: list[Path | str] = []
         for path in self.file_paths:
             if isinstance(path, str):
                 if path.startswith(("http://", "https://")):
@@ -109,7 +108,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
                         else:
                             raise ValueError(f"Invalid URL format: {path}")
                     except Exception as e:
-                        raise ValueError(f"Invalid URL: {path}. Error: {str(e)}")
+                        raise ValueError(f"Invalid URL: {path}. Error: {e!s}") from e
                 else:
                     local_path = Path(get_knowledge_directory() + "/" + path)
                     if local_path.exists():
diff --git a/src/crewai/knowledge/source/excel_knowledge_source.py b/src/crewai/knowledge/source/excel_knowledge_source.py
index 201f8f61c..363f5d4e2 100644
--- a/src/crewai/knowledge/source/excel_knowledge_source.py
+++ b/src/crewai/knowledge/source/excel_knowledge_source.py
@@ -1,11 +1,8 @@
 from pathlib import Path
-from typing import Dict, Iterator, List, Optional, Union
-from urllib.parse import urlparse
 
 from pydantic import Field, field_validator
 
 from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
-from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
 from crewai.utilities.logger import Logger
 from crewai.utilities.paths import get_knowledge_directory
 
@@ -17,16 +14,16 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
 
     _logger: Logger = Logger(verbose=True)
 
-    file_path: Optional[Union[Path, List[Path], str, List[str]]] = Field(
+    file_path: Path | list[Path] | str | list[str] | None = Field(
         default=None,
         description="[Deprecated] The path to the file. Use file_paths instead.",
     )
-    file_paths: Optional[Union[Path, List[Path], str, List[str]]] = Field(
+    file_paths: Path | list[Path] | str | list[str] | None = Field(
         default_factory=list, description="The path to the file"
     )
-    chunks: List[str] = Field(default_factory=list)
-    content: Dict[Path, Dict[str, str]] = Field(default_factory=dict)
-    safe_file_paths: List[Path] = Field(default_factory=list)
+    chunks: list[str] = Field(default_factory=list)
+    content: dict[Path, dict[str, str]] = Field(default_factory=dict)
+    safe_file_paths: list[Path] = Field(default_factory=list)
 
     @field_validator("file_path", "file_paths", mode="before")
     def validate_file_path(cls, v, info):
@@ -42,7 +39,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
             raise ValueError("Either file_path or file_paths must be provided")
         return v
 
-    def _process_file_paths(self) -> List[Path]:
+    def _process_file_paths(self) -> list[Path]:
         """Convert file_path to a list of Path objects."""
 
         if hasattr(self, "file_path") and self.file_path is not None:
@@ -57,7 +54,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
             raise ValueError("Your source must be provided with a file_paths: []")
 
         # Convert single path to list
-        path_list: List[Union[Path, str]] = (
+        path_list: list[Path | str] = (
             [self.file_paths]
             if isinstance(self.file_paths, (str, Path))
             else list(self.file_paths)
@@ -101,7 +98,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
         self.validate_content()
         self.content = self._load_content()
 
-    def _load_content(self) -> Dict[Path, Dict[str, str]]:
+    def _load_content(self) -> dict[Path, dict[str, str]]:
         """Load and preprocess Excel file content from multiple sheets.
 
         Each sheet's content is converted to CSV format and stored.
@@ -127,7 +124,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
             content_dict[file_path] = sheet_dict
         return content_dict
 
-    def convert_to_path(self, path: Union[Path, str]) -> Path:
+    def convert_to_path(self, path: Path | str) -> Path:
         """Convert a path to a Path object."""
         return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path
 
@@ -141,7 +138,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
             missing_package = str(e).split()[-1]
             raise ImportError(
                 f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
-            )
+            ) from e
 
     def add(self) -> None:
         """
@@ -162,7 +159,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
         self.chunks.extend(new_chunks)
         self._save_documents()
 
-    def _chunk_text(self, text: str) -> List[str]:
+    def _chunk_text(self, text: str) -> list[str]:
         """Utility method to split text into chunks."""
         return [
             text[i : i + self.chunk_size]
diff --git a/src/crewai/utilities/paths.py b/src/crewai/utilities/paths.py
index d780fa65a..b91ba013d 100644
--- a/src/crewai/utilities/paths.py
+++ b/src/crewai/utilities/paths.py
@@ -25,20 +25,17 @@ def get_project_directory_name():
 
     if project_directory_name:
         return project_directory_name
-    else:
-        cwd = Path.cwd()
-        project_directory_name = cwd.name
-        return project_directory_name
+    cwd = Path.cwd()
+    return cwd.name
 
 
 def get_knowledge_directory():
     """Returns the knowledge directory path from environment variable or default."""
     knowledge_dir = os.environ.get("CREWAI_KNOWLEDGE_FILE_DIR")
-    
+
     if knowledge_dir:
         knowledge_path = Path(knowledge_dir)
         if not knowledge_path.exists():
             raise ValueError(f"Knowledge directory does not exist: {knowledge_dir}")
         return str(knowledge_path)
-    else:
-        return "knowledge"
+    return "knowledge"
diff --git a/tests/knowledge/test_external_knowledge_directory.py b/tests/knowledge/test_external_knowledge_directory.py
index 50807468c..b9ca8ed89 100644
--- a/tests/knowledge/test_external_knowledge_directory.py
+++ b/tests/knowledge/test_external_knowledge_directory.py
@@ -1,9 +1,11 @@
 import os
 import tempfile
 from pathlib import Path
+
 import pytest
-from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
+
 from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
+from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
 
 
 class TestExternalKnowledgeDirectory:
@@ -13,15 +15,15 @@ class TestExternalKnowledgeDirectory:
             test_file = Path(temp_dir) / "test.txt"
             test_content = "This is a test file for external knowledge directory."
             test_file.write_text(test_content)
-            
+
             os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir
             try:
                 source = TextFileKnowledgeSource(file_paths=["test.txt"])
-                
+
                 assert len(source.content) == 1
-                loaded_content = list(source.content.values())[0]
+                loaded_content = next(iter(source.content.values()))
                 assert loaded_content == test_content
-                
+
             finally:
                 del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
 
@@ -32,17 +34,17 @@ class TestExternalKnowledgeDirectory:
             test_data = {"name": "John", "age": 30, "city": "New York"}
             import json
             test_file.write_text(json.dumps(test_data))
-            
+
             os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = temp_dir
             try:
                 source = JSONKnowledgeSource(file_paths=["test.json"])
-                
+
                 assert len(source.content) == 1
-                loaded_content = list(source.content.values())[0]
+                loaded_content = next(iter(source.content.values()))
                 assert "John" in loaded_content
                 assert "30" in loaded_content
                 assert "New York" in loaded_content
-                
+
             finally:
                 del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
 
@@ -50,21 +52,21 @@ class TestExternalKnowledgeDirectory:
         """Test that knowledge sources fall back to default directory when env var not set."""
         if "CREWAI_KNOWLEDGE_FILE_DIR" in os.environ:
             del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]
-        
+
         knowledge_dir = Path("knowledge")
         knowledge_dir.mkdir(exist_ok=True)
         test_file = knowledge_dir / "test_fallback.txt"
         test_content = "This is a test file for default knowledge directory."
-        
+
         try:
             test_file.write_text(test_content)
-            
+
             source = TextFileKnowledgeSource(file_paths=["test_fallback.txt"])
-            
+
             assert len(source.content) == 1
-            loaded_content = list(source.content.values())[0]
+            loaded_content = next(iter(source.content.values()))
             assert loaded_content == test_content
-            
+
         finally:
             if test_file.exists():
                 test_file.unlink()
@@ -75,16 +77,16 @@ class TestExternalKnowledgeDirectory:
             test_file = Path(temp_dir) / "test_absolute.txt"
             test_content = "This is a test file with absolute path."
             test_file.write_text(test_content)
-            
+
             with tempfile.TemporaryDirectory() as other_dir:
                 os.environ["CREWAI_KNOWLEDGE_FILE_DIR"] = other_dir
                 try:
                     source = TextFileKnowledgeSource(file_paths=[str(test_file)])
-                    
+
                     assert len(source.content) == 1
-                    loaded_content = list(source.content.values())[0]
+                    loaded_content = next(iter(source.content.values()))
                     assert loaded_content == test_content
-                    
+
                 finally:
                     del os.environ["CREWAI_KNOWLEDGE_FILE_DIR"]