Merge branch 'main' into brandon/eng-266-conversation-crew-v1

2026-01-15 02:58:30 +00:00 · 2025-01-03 15:24:19 -05:00
parent 9e5d4972b9 30bd79390a
commit 104e8bc167
10 changed files with 596 additions and 205 deletions
--- a/src/crewai/knowledge/source/crew_docling_source.py
+++ b/src/crewai/knowledge/source/crew_docling_source.py
@@ -2,11 +2,16 @@ from pathlib import Path
 from typing import Iterator, List, Optional, Union
 from urllib.parse import urlparse

-from docling.datamodel.base_models import InputFormat
-from docling.document_converter import DocumentConverter
-from docling.exceptions import ConversionError
-from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
-from docling_core.types.doc.document import DoclingDocument
+try:
+    from docling.datamodel.base_models import InputFormat
+    from docling.document_converter import DocumentConverter
+    from docling.exceptions import ConversionError
+    from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker
+    from docling_core.types.doc.document import DoclingDocument
+    DOCLING_AVAILABLE = True
+except ImportError:
+    DOCLING_AVAILABLE = False
+
 from pydantic import Field

 from crewai.knowledge.source.base_knowledge_source import BaseKnowledgeSource
@@ -19,6 +24,14 @@ class CrewDoclingSource(BaseKnowledgeSource):
    This will auto support PDF, DOCX, and TXT, XLSX, Images, and HTML files without any additional dependencies and follows the docling package as the source of truth.
    """

+    def __init__(self, *args, **kwargs):
+        if not DOCLING_AVAILABLE:
+            raise ImportError(
+                "The docling package is required to use CrewDoclingSource. "
+                "Please install it using: uv add docling"
+            )
+        super().__init__(*args, **kwargs)
+
    _logger: Logger = Logger(verbose=True)

    file_path: Optional[List[Union[Path, str]]] = Field(default=None)
--- a/src/crewai/project/crew_base.py
+++ b/src/crewai/project/crew_base.py
@@ -218,5 +218,5 @@ def CrewBase(cls: T) -> T:
    # Include base class (qual)name in the wrapper class (qual)name.
    WrappedClass.__name__ = CrewBase.__name__ + "(" + cls.__name__ + ")"
    WrappedClass.__qualname__ = CrewBase.__qualname__ + "(" + cls.__name__ + ")"
-  
+
    return cast(T, WrappedClass)
--- a/src/crewai/task.py
+++ b/src/crewai/task.py
@@ -136,6 +136,13 @@ class Task(BaseModel):
    )
    retry_count: int = Field(default=0, description="Current number of retries")

+    start_time: Optional[datetime.datetime] = Field(
+        default=None, description="Start time of the task execution"
+    )
+    end_time: Optional[datetime.datetime] = Field(
+        default=None, description="End time of the task execution"
+    )
+
    @field_validator("guardrail")
    @classmethod
    def validate_guardrail_function(cls, v: Optional[Callable]) -> Optional[Callable]:
@@ -184,7 +191,6 @@ class Task(BaseModel):
    _original_expected_output: Optional[str] = PrivateAttr(default=None)
    _original_output_file: Optional[str] = PrivateAttr(default=None)
    _thread: Optional[threading.Thread] = PrivateAttr(default=None)
-    _execution_time: Optional[float] = PrivateAttr(default=None)

    @model_validator(mode="before")
    @classmethod
@@ -209,25 +215,19 @@ class Task(BaseModel):
                "may_not_set_field", "This field is not to be set by the user.", {}
            )

-    def _set_start_execution_time(self) -> float:
-        return datetime.datetime.now().timestamp()
-
-    def _set_end_execution_time(self, start_time: float) -> None:
-        self._execution_time = datetime.datetime.now().timestamp() - start_time
-
    @field_validator("output_file")
    @classmethod
    def output_file_validation(cls, value: Optional[str]) -> Optional[str]:
        """Validate the output file path.
-        
+
        Args:
            value: The output file path to validate. Can be None or a string.
                  If the path contains template variables (e.g. {var}), leading slashes are preserved.
                  For regular paths, leading slashes are stripped.
-        
+
        Returns:
            The validated and potentially modified path, or None if no path was provided.
-            
+
        Raises:
            ValueError: If the path contains invalid characters, path traversal attempts,
                      or other security concerns.
@@ -237,18 +237,24 @@ class Task(BaseModel):

        # Basic security checks
        if ".." in value:
-            raise ValueError("Path traversal attempts are not allowed in output_file paths")
-        
+            raise ValueError(
+                "Path traversal attempts are not allowed in output_file paths"
+            )
+
        # Check for shell expansion first
-        if value.startswith('~') or value.startswith('$'):
-            raise ValueError("Shell expansion characters are not allowed in output_file paths")
-            
+        if value.startswith("~") or value.startswith("$"):
+            raise ValueError(
+                "Shell expansion characters are not allowed in output_file paths"
+            )
+
        # Then check other shell special characters
-        if any(char in value for char in ['|', '>', '<', '&', ';']):
-            raise ValueError("Shell special characters are not allowed in output_file paths")
+        if any(char in value for char in ["|", ">", "<", "&", ";"]):
+            raise ValueError(
+                "Shell special characters are not allowed in output_file paths"
+            )

        # Don't strip leading slash if it's a template path with variables
-        if "{" in value or "}" in value: 
+        if "{" in value or "}" in value:
            # Validate template variable format
            template_vars = [part.split("}")[0] for part in value.split("{")[1:]]
            for var in template_vars:
@@ -305,6 +311,12 @@ class Task(BaseModel):

        return md5("|".join(source).encode(), usedforsecurity=False).hexdigest()

+    @property
+    def execution_duration(self) -> float | None:
+        if not self.start_time or not self.end_time:
+            return None
+        return (self.end_time - self.start_time).total_seconds()
+
    def execute_async(
        self,
        agent: BaseAgent | None = None,
@@ -345,7 +357,7 @@ class Task(BaseModel):
                f"The task '{self.description}' has no agent assigned, therefore it can't be executed directly and should be executed in a Crew using a specific process that support that, like hierarchical."
            )

-        start_time = self._set_start_execution_time()
+        self.start_time = datetime.datetime.now()
        self._execution_span = self._telemetry.task_started(crew=agent.crew, task=self)

        self.prompt_context = context
@@ -404,8 +416,8 @@ class Task(BaseModel):
                task_output = guardrail_result.result

        self.output = task_output
+        self.end_time = datetime.datetime.now()

-        self._set_end_execution_time(start_time)
        if self.callback:
            self.callback(self.output)

@@ -417,7 +429,9 @@ class Task(BaseModel):
            content = (
                json_output
                if json_output
-                else pydantic_output.model_dump_json() if pydantic_output else result
+                else pydantic_output.model_dump_json()
+                if pydantic_output
+                else result
            )
            self._save_file(content)

@@ -445,7 +459,7 @@ class Task(BaseModel):
        Args:
            inputs: Dictionary mapping template variables to their values.
                   Supported value types are strings, integers, and floats.
-        
+
        Raises:
            ValueError: If a required template variable is missing from inputs.
        """
@@ -462,7 +476,9 @@ class Task(BaseModel):
        try:
            self.description = self._original_description.format(**inputs)
        except KeyError as e:
-            raise ValueError(f"Missing required template variable '{e.args[0]}' in description") from e
+            raise ValueError(
+                f"Missing required template variable '{e.args[0]}' in description"
+            ) from e
        except ValueError as e:
            raise ValueError(f"Error interpolating description: {str(e)}") from e

--- a/src/crewai/tools/tool_usage.py
+++ b/src/crewai/tools/tool_usage.py
@@ -169,7 +169,7 @@ class ToolUsage:

                if calling.arguments:
                    try:
-                        acceptable_args = tool.args_schema.schema()["properties"].keys()  # type: ignore # Item "None" of "type[BaseModel] | None" has no attribute "schema"
+                        acceptable_args = tool.args_schema.model_json_schema()["properties"].keys()  # type: ignore
                        arguments = {
                            k: v
                            for k, v in calling.arguments.items()
--- a/src/crewai/utilities/evaluators/crew_evaluator_handler.py
+++ b/src/crewai/utilities/evaluators/crew_evaluator_handler.py
@@ -180,12 +180,12 @@ class CrewEvaluator:
            self._test_result_span = self._telemetry.individual_test_result_span(
                self.crew,
                evaluation_result.pydantic.quality,
-                current_task._execution_time,
+                current_task.execution_duration,
                self.openai_model_name,
            )
            self.tasks_scores[self.iteration].append(evaluation_result.pydantic.quality)
            self.run_execution_times[self.iteration].append(
-                current_task._execution_time
+                current_task.execution_duration
            )
        else:
            raise ValueError("Evaluation result is not in the expected format")