mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 16:22:49 +00:00
fix: Address lint issues in external knowledge directory implementation
- Remove unnecessary variable assignment in paths.py (RET504) - Add proper exception chaining in crew_docling_source.py and excel_knowledge_source.py (B904) - Use next(iter(...)) instead of list(...)[0] in test files (RUF015) Note: N805 error about cls vs self in field_validator is a false positive - Pydantic field validators correctly use cls as first parameter Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -1,5 +1,5 @@
|
|||||||
|
from collections.abc import Iterator
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Iterator, List, Optional, Union
|
|
||||||
from urllib.parse import urlparse
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -16,7 +16,6 @@ except ImportError:
|
|||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
|
|
||||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
|
||||||
from crewai.utilities.logger import Logger
|
from crewai.utilities.logger import Logger
|
||||||
from crewai.utilities.paths import get_knowledge_directory
|
from crewai.utilities.paths import get_knowledge_directory
|
||||||
|
|
||||||
@@ -36,11 +35,11 @@ class CrewDoclingSource(BaseKnowledgeSource):
|
|||||||
|
|
||||||
_logger: Logger = Logger(verbose=True)
|
_logger: Logger = Logger(verbose=True)
|
||||||
|
|
||||||
file_path: Optional[List[Union[Path, str]]] = Field(default=None)
|
file_path: list[Path | str] | None = Field(default=None)
|
||||||
file_paths: List[Union[Path, str]] = Field(default_factory=list)
|
file_paths: list[Path | str] = Field(default_factory=list)
|
||||||
chunks: List[str] = Field(default_factory=list)
|
chunks: list[str] = Field(default_factory=list)
|
||||||
safe_file_paths: List[Union[Path, str]] = Field(default_factory=list)
|
safe_file_paths: list[Path | str] = Field(default_factory=list)
|
||||||
content: List["DoclingDocument"] = Field(default_factory=list)
|
content: list["DoclingDocument"] = Field(default_factory=list)
|
||||||
document_converter: "DocumentConverter" = Field(
|
document_converter: "DocumentConverter" = Field(
|
||||||
default_factory=lambda: DocumentConverter(
|
default_factory=lambda: DocumentConverter(
|
||||||
allowed_formats=[
|
allowed_formats=[
|
||||||
@@ -67,7 +66,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
|
|||||||
self.safe_file_paths = self.validate_content()
|
self.safe_file_paths = self.validate_content()
|
||||||
self.content = self._load_content()
|
self.content = self._load_content()
|
||||||
|
|
||||||
def _load_content(self) -> List["DoclingDocument"]:
|
def _load_content(self) -> list["DoclingDocument"]:
|
||||||
try:
|
try:
|
||||||
return self._convert_source_to_docling_documents()
|
return self._convert_source_to_docling_documents()
|
||||||
except ConversionError as e:
|
except ConversionError as e:
|
||||||
@@ -89,7 +88,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
|
|||||||
self.chunks.extend(list(new_chunks_iterable))
|
self.chunks.extend(list(new_chunks_iterable))
|
||||||
self._save_documents()
|
self._save_documents()
|
||||||
|
|
||||||
def _convert_source_to_docling_documents(self) -> List["DoclingDocument"]:
|
def _convert_source_to_docling_documents(self) -> list["DoclingDocument"]:
|
||||||
conv_results_iter = self.document_converter.convert_all(self.safe_file_paths)
|
conv_results_iter = self.document_converter.convert_all(self.safe_file_paths)
|
||||||
return [result.document for result in conv_results_iter]
|
return [result.document for result in conv_results_iter]
|
||||||
|
|
||||||
@@ -98,8 +97,8 @@ class CrewDoclingSource(BaseKnowledgeSource):
|
|||||||
for chunk in chunker.chunk(doc):
|
for chunk in chunker.chunk(doc):
|
||||||
yield chunk.text
|
yield chunk.text
|
||||||
|
|
||||||
def validate_content(self) -> List[Union[Path, str]]:
|
def validate_content(self) -> list[Path | str]:
|
||||||
processed_paths: List[Union[Path, str]] = []
|
processed_paths: list[Path | str] = []
|
||||||
for path in self.file_paths:
|
for path in self.file_paths:
|
||||||
if isinstance(path, str):
|
if isinstance(path, str):
|
||||||
if path.startswith(("http://", "https://")):
|
if path.startswith(("http://", "https://")):
|
||||||
@@ -109,7 +108,7 @@ class CrewDoclingSource(BaseKnowledgeSource):
|
|||||||
else:
|
else:
|
||||||
raise ValueError(f"Invalid URL format: {path}")
|
raise ValueError(f"Invalid URL format: {path}")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise ValueError(f"Invalid URL: {path}. Error: {str(e)}")
|
raise ValueError(f"Invalid URL: {path}. Error: {e!s}") from e
|
||||||
else:
|
else:
|
||||||
local_path = Path(get_knowledge_directory() + "/" + path)
|
local_path = Path(get_knowledge_directory() + "/" + path)
|
||||||
if local_path.exists():
|
if local_path.exists():
|
||||||
|
|||||||
@@ -1,11 +1,8 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Dict, Iterator, List, Optional, Union
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
from pydantic import Field, field_validator
|
from pydantic import Field, field_validator
|
||||||
|
|
||||||
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
|
||||||
from crewai.utilities.constants import KNOWLEDGE_DIRECTORY
|
|
||||||
from crewai.utilities.logger import Logger
|
from crewai.utilities.logger import Logger
|
||||||
from crewai.utilities.paths import get_knowledge_directory
|
from crewai.utilities.paths import get_knowledge_directory
|
||||||
|
|
||||||
@@ -17,16 +14,16 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
|
|
||||||
_logger: Logger = Logger(verbose=True)
|
_logger: Logger = Logger(verbose=True)
|
||||||
|
|
||||||
file_path: Optional[Union[Path, List[Path], str, List[str]]] = Field(
|
file_path: Path | list[Path] | str | list[str] | None = Field(
|
||||||
default=None,
|
default=None,
|
||||||
description="[Deprecated] The path to the file. Use file_paths instead.",
|
description="[Deprecated] The path to the file. Use file_paths instead.",
|
||||||
)
|
)
|
||||||
file_paths: Optional[Union[Path, List[Path], str, List[str]]] = Field(
|
file_paths: Path | list[Path] | str | list[str] | None = Field(
|
||||||
default_factory=list, description="The path to the file"
|
default_factory=list, description="The path to the file"
|
||||||
)
|
)
|
||||||
chunks: List[str] = Field(default_factory=list)
|
chunks: list[str] = Field(default_factory=list)
|
||||||
content: Dict[Path, Dict[str, str]] = Field(default_factory=dict)
|
content: dict[Path, dict[str, str]] = Field(default_factory=dict)
|
||||||
safe_file_paths: List[Path] = Field(default_factory=list)
|
safe_file_paths: list[Path] = Field(default_factory=list)
|
||||||
|
|
||||||
@field_validator("file_path", "file_paths", mode="before")
|
@field_validator("file_path", "file_paths", mode="before")
|
||||||
def validate_file_path(cls, v, info):
|
def validate_file_path(cls, v, info):
|
||||||
@@ -42,7 +39,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
raise ValueError("Either file_path or file_paths must be provided")
|
raise ValueError("Either file_path or file_paths must be provided")
|
||||||
return v
|
return v
|
||||||
|
|
||||||
def _process_file_paths(self) -> List[Path]:
|
def _process_file_paths(self) -> list[Path]:
|
||||||
"""Convert file_path to a list of Path objects."""
|
"""Convert file_path to a list of Path objects."""
|
||||||
|
|
||||||
if hasattr(self, "file_path") and self.file_path is not None:
|
if hasattr(self, "file_path") and self.file_path is not None:
|
||||||
@@ -57,7 +54,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
raise ValueError("Your source must be provided with a file_paths: []")
|
raise ValueError("Your source must be provided with a file_paths: []")
|
||||||
|
|
||||||
# Convert single path to list
|
# Convert single path to list
|
||||||
path_list: List[Union[Path, str]] = (
|
path_list: list[Path | str] = (
|
||||||
[self.file_paths]
|
[self.file_paths]
|
||||||
if isinstance(self.file_paths, (str, Path))
|
if isinstance(self.file_paths, (str, Path))
|
||||||
else list(self.file_paths)
|
else list(self.file_paths)
|
||||||
@@ -101,7 +98,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
self.validate_content()
|
self.validate_content()
|
||||||
self.content = self._load_content()
|
self.content = self._load_content()
|
||||||
|
|
||||||
def _load_content(self) -> Dict[Path, Dict[str, str]]:
|
def _load_content(self) -> dict[Path, dict[str, str]]:
|
||||||
"""Load and preprocess Excel file content from multiple sheets.
|
"""Load and preprocess Excel file content from multiple sheets.
|
||||||
|
|
||||||
Each sheet's content is converted to CSV format and stored.
|
Each sheet's content is converted to CSV format and stored.
|
||||||
@@ -127,7 +124,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
content_dict[file_path] = sheet_dict
|
content_dict[file_path] = sheet_dict
|
||||||
return content_dict
|
return content_dict
|
||||||
|
|
||||||
def convert_to_path(self, path: Union[Path, str]) -> Path:
|
def convert_to_path(self, path: Path | str) -> Path:
|
||||||
"""Convert a path to a Path object."""
|
"""Convert a path to a Path object."""
|
||||||
return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path
|
return Path(get_knowledge_directory() + "/" + path) if isinstance(path, str) else path
|
||||||
|
|
||||||
@@ -141,7 +138,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
missing_package = str(e).split()[-1]
|
missing_package = str(e).split()[-1]
|
||||||
raise ImportError(
|
raise ImportError(
|
||||||
f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
|
f"{missing_package} is not installed. Please install it with: pip install {missing_package}"
|
||||||
)
|
) from e
|
||||||
|
|
||||||
def add(self) -> None:
|
def add(self) -> None:
|
||||||
"""
|
"""
|
||||||
@@ -162,7 +159,7 @@ class ExcelKnowledgeSource(BaseKnowledgeSource):
|
|||||||
self.chunks.extend(new_chunks)
|
self.chunks.extend(new_chunks)
|
||||||
self._save_documents()
|
self._save_documents()
|
||||||
|
|
||||||
def _chunk_text(self, text: str) -> List[str]:
|
def _chunk_text(self, text: str) -> list[str]:
|
||||||
"""Utility method to split text into chunks."""
|
"""Utility method to split text into chunks."""
|
||||||
return [
|
return [
|
||||||
text[i : i + self.chunk_size]
|
text[i : i + self.chunk_size]
|
||||||
|
|||||||
@@ -25,10 +25,8 @@ def get_project_directory_name():
|
|||||||
|
|
||||||
if project_directory_name:
|
if project_directory_name:
|
||||||
return project_directory_name
|
return project_directory_name
|
||||||
else:
|
|
||||||
cwd = Path.cwd()
|
cwd = Path.cwd()
|
||||||
project_directory_name = cwd.name
|
return cwd.name
|
||||||
return project_directory_name
|
|
||||||
|
|
||||||
|
|
||||||
def get_knowledge_directory():
|
def get_knowledge_directory():
|
||||||
@@ -40,5 +38,4 @@ def get_knowledge_directory():
|
|||||||
if not knowledge_path.exists():
|
if not knowledge_path.exists():
|
||||||
raise ValueError(f"Knowledge directory does not exist: {knowledge_dir}")
|
raise ValueError(f"Knowledge directory does not exist: {knowledge_dir}")
|
||||||
return str(knowledge_path)
|
return str(knowledge_path)
|
||||||
else:
|
|
||||||
return "knowledge"
|
return "knowledge"
|
||||||
|
|||||||
@@ -1,9 +1,11 @@
|
|||||||
import os
|
import os
|
||||||
import tempfile
|
import tempfile
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
|
|
||||||
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
|
from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
|
||||||
|
from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
|
||||||
|
|
||||||
|
|
||||||
class TestExternalKnowledgeDirectory:
|
class TestExternalKnowledgeDirectory:
|
||||||
@@ -19,7 +21,7 @@ class TestExternalKnowledgeDirectory:
|
|||||||
source = TextFileKnowledgeSource(file_paths=["test.txt"])
|
source = TextFileKnowledgeSource(file_paths=["test.txt"])
|
||||||
|
|
||||||
assert len(source.content) == 1
|
assert len(source.content) == 1
|
||||||
loaded_content = list(source.content.values())[0]
|
loaded_content = next(iter(source.content.values()))
|
||||||
assert loaded_content == test_content
|
assert loaded_content == test_content
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -38,7 +40,7 @@ class TestExternalKnowledgeDirectory:
|
|||||||
source = JSONKnowledgeSource(file_paths=["test.json"])
|
source = JSONKnowledgeSource(file_paths=["test.json"])
|
||||||
|
|
||||||
assert len(source.content) == 1
|
assert len(source.content) == 1
|
||||||
loaded_content = list(source.content.values())[0]
|
loaded_content = next(iter(source.content.values()))
|
||||||
assert "John" in loaded_content
|
assert "John" in loaded_content
|
||||||
assert "30" in loaded_content
|
assert "30" in loaded_content
|
||||||
assert "New York" in loaded_content
|
assert "New York" in loaded_content
|
||||||
@@ -62,7 +64,7 @@ class TestExternalKnowledgeDirectory:
|
|||||||
source = TextFileKnowledgeSource(file_paths=["test_fallback.txt"])
|
source = TextFileKnowledgeSource(file_paths=["test_fallback.txt"])
|
||||||
|
|
||||||
assert len(source.content) == 1
|
assert len(source.content) == 1
|
||||||
loaded_content = list(source.content.values())[0]
|
loaded_content = next(iter(source.content.values()))
|
||||||
assert loaded_content == test_content
|
assert loaded_content == test_content
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
@@ -82,7 +84,7 @@ class TestExternalKnowledgeDirectory:
|
|||||||
source = TextFileKnowledgeSource(file_paths=[str(test_file)])
|
source = TextFileKnowledgeSource(file_paths=[str(test_file)])
|
||||||
|
|
||||||
assert len(source.content) == 1
|
assert len(source.content) == 1
|
||||||
loaded_content = list(source.content.values())[0]
|
loaded_content = next(iter(source.content.values()))
|
||||||
assert loaded_content == test_content
|
assert loaded_content == test_content
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
|
|||||||
Reference in New Issue
Block a user