replace print by logger

add debuging to Tool calls
feat: add pydantic validation dunder to BaseInterceptor
2025-12-16 20:38:29 +00:00 · 2025-11-06 20:45:00 -03:00 · 2025-11-06 17:47:32 -03:00 · 2025-11-06 15:27:07 -05:00 · 2025-11-06 10:59:52 -08:00
12 changed files with 1881 additions and 2250 deletions
--- a/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py
+++ b/lib/crewai-tools/src/crewai_tools/tools/crewai_platform_tools/crewai_platform_tool_builder.py
@@ -10,7 +10,7 @@ from crewai_tools.tools.crewai_platform_tools.misc import (
    get_platform_api_base_url,
    get_platform_integration_token,
 )
-
+from crewai.utilities.logger import Logger

 class CrewaiPlatformToolBuilder:
    def __init__(
@@ -20,6 +20,7 @@ class CrewaiPlatformToolBuilder:
        self._apps = apps
        self._actions_schema = {}  # type: ignore[var-annotated]
        self._tools = None
+        self._logger = Logger(verbose=True)

    def tools(self) -> list[BaseTool]:
        if self._tools is None:
@@ -30,6 +31,7 @@ class CrewaiPlatformToolBuilder:
    def _fetch_actions(self):
        actions_url = f"{get_platform_api_base_url()}/actions"
        headers = {"Authorization": f"Bearer {get_platform_integration_token()}"}
+        self._logger.log("info", f"Fetch actions: {actions_url}, {headers}, {','.join(self._apps)}")

        try:
            response = requests.get(
@@ -39,11 +41,13 @@ class CrewaiPlatformToolBuilder:
                params={"apps": ",".join(self._apps)},
            )
            response.raise_for_status()
-        except Exception:
+        except Exception as e:
+            self._logger.log("error", f"Error fetching actions: {e!s}")
            return

-        raw_data = response.json()

+        raw_data = response.json()
+        print("->>>>> raw_data:", raw_data)
        self._actions_schema = {}
        action_categories = raw_data.get("actions", {})

--- a/lib/crewai/src/crewai/agent/core.py
+++ b/lib/crewai/src/crewai/agent/core.py
@@ -640,6 +640,7 @@ class Agent(BaseAgent):

    def get_platform_tools(self, apps: list[PlatformAppOrAction]) -> list[BaseTool]:
        try:
+            self._logger.log("info", f"Start get_platform_tools: {apps}")
            from crewai_tools import (
                CrewaiPlatformTools,
            )
--- a/lib/crewai/src/crewai/llms/hooks/base.py
+++ b/lib/crewai/src/crewai/llms/hooks/base.py
@@ -7,7 +7,14 @@ outbound and inbound messages at the transport level.
 from __future__ import annotations

 from abc import ABC, abstractmethod
-from typing import Generic, TypeVar
+from typing import TYPE_CHECKING, Any, Generic, TypeVar
+
+from pydantic_core import core_schema
+
+
+if TYPE_CHECKING:
+    from pydantic import GetCoreSchemaHandler
+    from pydantic_core import CoreSchema


 T = TypeVar("T")
@@ -25,6 +32,7 @@ class BaseInterceptor(ABC, Generic[T, U]):
        U: Inbound message type (e.g., httpx.Response)

    Example:
+        >>> import httpx
        >>> class CustomInterceptor(BaseInterceptor[httpx.Request, httpx.Response]):
        ...     def on_outbound(self, message: httpx.Request) -> httpx.Request:
        ...         message.headers["X-Custom-Header"] = "value"
@@ -80,3 +88,46 @@ class BaseInterceptor(ABC, Generic[T, U]):
            Modified message object.
        """
        raise NotImplementedError
+
+    @classmethod
+    def __get_pydantic_core_schema__(
+        cls, _source_type: Any, _handler: GetCoreSchemaHandler
+    ) -> CoreSchema:
+        """Generate Pydantic core schema for BaseInterceptor.
+
+        This allows the generic BaseInterceptor to be used in Pydantic models
+        without requiring arbitrary_types_allowed=True. The schema validates
+        that the value is an instance of BaseInterceptor.
+
+        Args:
+            _source_type: The source type being validated (unused).
+            _handler: Handler for generating schemas (unused).
+
+        Returns:
+            A Pydantic core schema that validates BaseInterceptor instances.
+        """
+        return core_schema.no_info_plain_validator_function(
+            _validate_interceptor,
+            serialization=core_schema.plain_serializer_function_ser_schema(
+                lambda x: x, return_schema=core_schema.any_schema()
+            ),
+        )
+
+
+def _validate_interceptor(value: Any) -> BaseInterceptor[T, U]:
+    """Validate that the value is a BaseInterceptor instance.
+
+    Args:
+        value: The value to validate.
+
+    Returns:
+        The validated BaseInterceptor instance.
+
+    Raises:
+        ValueError: If the value is not a BaseInterceptor instance.
+    """
+    if not isinstance(value, BaseInterceptor):
+        raise ValueError(
+            f"Expected BaseInterceptor instance, got {type(value).__name__}"
+        )
+    return value
--- a/lib/crewai/src/crewai/llms/hooks/transport.py
+++ b/lib/crewai/src/crewai/llms/hooks/transport.py
@@ -6,16 +6,53 @@ to enable request/response modification at the transport level.

 from __future__ import annotations

-from typing import TYPE_CHECKING, Any
+from collections.abc import Iterable
+from typing import TYPE_CHECKING, TypedDict

-import httpx
+from httpx import (
+    AsyncHTTPTransport as _AsyncHTTPTransport,
+    HTTPTransport as _HTTPTransport,
+)
+from typing_extensions import NotRequired, Unpack


 if TYPE_CHECKING:
+    from ssl import SSLContext
+
+    from httpx import Limits, Request, Response
+    from httpx._types import CertTypes, ProxyTypes
+
    from crewai.llms.hooks.base import BaseInterceptor


-class HTTPTransport(httpx.HTTPTransport):
+class HTTPTransportKwargs(TypedDict):
+    """Typed dictionary for httpx.HTTPTransport initialization parameters.
+
+    These parameters configure the underlying HTTP transport behavior including
+    SSL verification, proxies, connection limits, and low-level socket options.
+    """
+
+    verify: bool | str | SSLContext
+    cert: NotRequired[CertTypes | None]
+    trust_env: bool
+    http1: bool
+    http2: bool
+    limits: Limits
+    proxy: NotRequired[ProxyTypes | None]
+    uds: NotRequired[str | None]
+    local_address: NotRequired[str | None]
+    retries: int
+    socket_options: NotRequired[
+        Iterable[
+            tuple[int, int, int]
+            | tuple[int, int, bytes | bytearray]
+            | tuple[int, int, None, int]
+        ]
+        | None
+    ]
+
+
+class HTTPTransport(_HTTPTransport):
    """HTTP transport that uses an interceptor for request/response modification.

    This transport is used internally when a user provides a BaseInterceptor.
@@ -25,19 +62,19 @@ class HTTPTransport(httpx.HTTPTransport):

    def __init__(
        self,
-        interceptor: BaseInterceptor[httpx.Request, httpx.Response],
-        **kwargs: Any,
+        interceptor: BaseInterceptor[Request, Response],
+        **kwargs: Unpack[HTTPTransportKwargs],
    ) -> None:
        """Initialize transport with interceptor.

        Args:
            interceptor: HTTP interceptor for modifying raw request/response objects.
-            **kwargs: Additional arguments passed to httpx.HTTPTransport.
+            **kwargs: HTTPTransport configuration parameters (verify, cert, proxy, etc.).
        """
        super().__init__(**kwargs)
        self.interceptor = interceptor

-    def handle_request(self, request: httpx.Request) -> httpx.Response:
+    def handle_request(self, request: Request) -> Response:
        """Handle request with interception.

        Args:
@@ -51,7 +88,7 @@ class HTTPTransport(httpx.HTTPTransport):
        return self.interceptor.on_inbound(response)


-class AsyncHTTPransport(httpx.AsyncHTTPTransport):
+class AsyncHTTPTransport(_AsyncHTTPTransport):
    """Async HTTP transport that uses an interceptor for request/response modification.

    This transport is used internally when a user provides a BaseInterceptor.
@@ -61,19 +98,19 @@ class AsyncHTTPransport(httpx.AsyncHTTPTransport):

    def __init__(
        self,
-        interceptor: BaseInterceptor[httpx.Request, httpx.Response],
-        **kwargs: Any,
+        interceptor: BaseInterceptor[Request, Response],
+        **kwargs: Unpack[HTTPTransportKwargs],
    ) -> None:
        """Initialize async transport with interceptor.

        Args:
            interceptor: HTTP interceptor for modifying raw request/response objects.
-            **kwargs: Additional arguments passed to httpx.AsyncHTTPTransport.
+            **kwargs: HTTPTransport configuration parameters (verify, cert, proxy, etc.).
        """
        super().__init__(**kwargs)
        self.interceptor = interceptor

-    async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
+    async def handle_async_request(self, request: Request) -> Response:
        """Handle async request with interception.

        Args:
--- a/lib/crewai/src/crewai/rag/chromadb/utils.py
+++ b/lib/crewai/src/crewai/rag/chromadb/utils.py
@@ -67,31 +67,44 @@ def _prepare_documents_for_chromadb(
    ids: list[str] = []
    texts: list[str] = []
    metadatas: list[Mapping[str, str | int | float | bool]] = []
+    seen_ids: dict[str, int] = {}
+
+    try:
+        for doc in documents:
+            if "doc_id" in doc:
+                doc_id = str(doc["doc_id"])
+            else:
+                metadata = doc.get("metadata")
+                if metadata and isinstance(metadata, dict) and "doc_id" in metadata:
+                    doc_id = str(metadata["doc_id"])
+                else:
+                    content_for_hash = doc["content"]
+                    if metadata:
+                        metadata_str = json.dumps(metadata, sort_keys=True)
+                        content_for_hash = f"{content_for_hash}|{metadata_str}"
+                    doc_id = hashlib.sha256(content_for_hash.encode()).hexdigest()

-    for doc in documents:
-        if "doc_id" in doc:
-            ids.append(doc["doc_id"])
-        else:
-            content_for_hash = doc["content"]
            metadata = doc.get("metadata")
            if metadata:
-                metadata_str = json.dumps(metadata, sort_keys=True)
-                content_for_hash = f"{content_for_hash}|{metadata_str}"
-
-            content_hash = hashlib.blake2b(
-                content_for_hash.encode(), digest_size=32
-            ).hexdigest()
-            ids.append(content_hash)
-
-        texts.append(doc["content"])
-        metadata = doc.get("metadata")
-        if metadata:
-            if isinstance(metadata, list):
-                metadatas.append(metadata[0] if metadata and metadata[0] else {})
+                if isinstance(metadata, list):
+                    processed_metadata = metadata[0] if metadata and metadata[0] else {}
+                else:
+                    processed_metadata = metadata
            else:
-                metadatas.append(metadata)
-        else:
-            metadatas.append({})
+                processed_metadata = {}
+
+            if doc_id in seen_ids:
+                idx = seen_ids[doc_id]
+                texts[idx] = doc["content"]
+                metadatas[idx] = processed_metadata
+            else:
+                idx = len(ids)
+                ids.append(doc_id)
+                texts.append(doc["content"])
+                metadatas.append(processed_metadata)
+                seen_ids[doc_id] = idx
+    except Exception as e:
+        raise ValueError(f"Error preparing documents for ChromaDB: {e}") from e

    return PreparedDocuments(ids, texts, metadatas)

--- a/lib/crewai/tests/agents/test_agent.py
+++ b/lib/crewai/tests/agents/test_agent.py
@@ -2117,15 +2117,14 @@ def test_agent_with_only_crewai_knowledge():
        goal="Provide information based on knowledge sources",
        backstory="You have access to specific knowledge sources.",
        llm=LLM(
-            model="openrouter/openai/gpt-4o-mini",
-            api_key=os.getenv("OPENROUTER_API_KEY"),
+            model="gpt-4o-mini",
        ),
    )

    # Create a task that requires the agent to use the knowledge
    task = Task(
        description="What is Vidit's favorite color?",
-        expected_output="Vidit's favorclearite color.",
+        expected_output="Vidit's favorite color.",
        agent=agent,
    )

--- a/lib/crewai/tests/cassettes/test_agent_execute_task_with_context.yaml
+++ b/lib/crewai/tests/cassettes/test_agent_execute_task_with_context.yaml
@@ -1,35 +1,128 @@
 interactions:
 - request:
-    body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
+    body: '{"trace_id": "bf042234-54a3-4fc0-857d-1ae5585a174e", "execution_type":
+      "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
+      "crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "1.3.0", "privacy_level":
+      "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
+      0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2025-11-06T16:05:14.776800+00:00"}}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '434'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - CrewAI-CLI/1.3.0
+      X-Crewai-Version:
+      - 1.3.0
+    method: POST
+    uri: https://app.crewai.com/crewai_plus/api/v1/tracing/batches
+  response:
+    body:
+      string: '{"error":"bad_credentials","message":"Bad credentials"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '55'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Thu, 06 Nov 2025 16:05:15 GMT
+      cache-control:
+      - no-store
+      content-security-policy:
+      - 'default-src ''self'' *.app.crewai.com app.crewai.com; script-src ''self''
+        ''unsafe-inline'' *.app.crewai.com app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts
+        https://www.gstatic.com https://run.pstmn.io https://apis.google.com https://apis.google.com/js/api.js
+        https://accounts.google.com https://accounts.google.com/gsi/client https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css.map
+        https://*.google.com https://docs.google.com https://slides.google.com https://js.hs-scripts.com
+        https://js.sentry-cdn.com https://browser.sentry-cdn.com https://www.googletagmanager.com
+        https://js-na1.hs-scripts.com https://js.hubspot.com http://js-na1.hs-scripts.com
+        https://bat.bing.com https://cdn.amplitude.com https://cdn.segment.com https://d1d3n03t5zntha.cloudfront.net/
+        https://descriptusercontent.com https://edge.fullstory.com https://googleads.g.doubleclick.net
+        https://js.hs-analytics.net https://js.hs-banner.com https://js.hsadspixel.net
+        https://js.hscollectedforms.net https://js.usemessages.com https://snap.licdn.com
+        https://static.cloudflareinsights.com https://static.reo.dev https://www.google-analytics.com
+        https://share.descript.com/; style-src ''self'' ''unsafe-inline'' *.app.crewai.com
+        app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts; img-src ''self'' data:
+        *.app.crewai.com app.crewai.com https://zeus.tools.crewai.com https://dashboard.tools.crewai.com
+        https://cdn.jsdelivr.net https://forms.hsforms.com https://track.hubspot.com
+        https://px.ads.linkedin.com https://px4.ads.linkedin.com https://www.google.com
+        https://www.google.com.br; font-src ''self'' data: *.app.crewai.com app.crewai.com;
+        connect-src ''self'' *.app.crewai.com app.crewai.com https://zeus.tools.crewai.com
+        https://connect.useparagon.com/ https://zeus.useparagon.com/* https://*.useparagon.com/*
+        https://run.pstmn.io https://connect.tools.crewai.com/ https://*.sentry.io
+        https://www.google-analytics.com https://edge.fullstory.com https://rs.fullstory.com
+        https://api.hubspot.com https://forms.hscollectedforms.net https://api.hubapi.com
+        https://px.ads.linkedin.com https://px4.ads.linkedin.com https://google.com/pagead/form-data/16713662509
+        https://google.com/ccm/form-data/16713662509 https://www.google.com/ccm/collect
+        https://worker-actionkit.tools.crewai.com https://api.reo.dev; frame-src ''self''
+        *.app.crewai.com app.crewai.com https://connect.useparagon.com/ https://zeus.tools.crewai.com
+        https://zeus.useparagon.com/* https://connect.tools.crewai.com/ https://docs.google.com
+        https://drive.google.com https://slides.google.com https://accounts.google.com
+        https://*.google.com https://app.hubspot.com/ https://td.doubleclick.net https://www.googletagmanager.com/
+        https://www.youtube.com https://share.descript.com'
+      expires:
+      - '0'
+      permissions-policy:
+      - camera=(), microphone=(self), geolocation=()
+      pragma:
+      - no-cache
+      referrer-policy:
+      - strict-origin-when-cross-origin
+      strict-transport-security:
+      - max-age=63072000; includeSubDomains
+      vary:
+      - Accept
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+      x-permitted-cross-domain-policies:
+      - none
+      x-request-id:
+      - 9e528076-59a8-4c21-a999-2367937321ed
+      x-runtime:
+      - '0.070063'
+      x-xss-protection:
+      - 1; mode=block
+    status:
+      code: 401
+      message: Unauthorized
+- request:
+    body: '{"messages":[{"role":"system","content":"You are test role. test backstory\nYour
      personal goal is: test goal\nTo give my best complete final answer to the task
-      use the exact following format:\n\nThought: I now can give a great answer\nFinal
-      Answer: Your final answer must be the great and the most complete as possible,
-      it must be outcome described.\n\nI MUST use these formats, my job depends on
-      it!"}, {"role": "user", "content": "\nCurrent Task: Summarize the given context
-      in one sentence\n\nThis is the expect criteria for your final answer: A one-sentence
-      summary\nyou MUST return the actual complete content as the final answer, not
-      a summary.\n\nThis is the context you''re working with:\nThe quick brown fox
-      jumps over the lazy dog. This sentence contains every letter of the alphabet.\n\nBegin!
-      This is VERY important to you, use the tools available and give your best Final
-      Answer, your job depends on it!\n\nThought:"}], "model": "gpt-3.5-turbo"}'
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"},{"role":"user","content":"\nCurrent Task: Summarize the given
+      context in one sentence\n\nThis is the expected criteria for your final answer:
+      A one-sentence summary\nyou MUST return the actual complete content as the final
+      answer, not a summary.\n\nThis is the context you''re working with:\nThe quick
+      brown fox jumps over the lazy dog. This sentence contains every letter of the
+      alphabet.\n\nBegin! This is VERY important to you, use the tools available and
+      give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-3.5-turbo"}'
    headers:
      accept:
      - application/json
      accept-encoding:
-      - gzip, deflate
+      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
-      - '961'
+      - '963'
      content-type:
      - application/json
-      cookie:
-      - __cf_bm=rb61BZH2ejzD5YPmLaEJqI7km71QqyNJGTVdNxBq6qk-1727213194-1.0.1.1-pJ49onmgX9IugEMuYQMralzD7oj_6W.CHbSu4Su1z3NyjTGYg.rhgJZWng8feFYah._oSnoYlkTjpK1Wd2C9FA;
-        _cfuvid=lbRdAddVWV6W3f5Dm9SaOPWDUOxqtZBSPr_fTW26nEA-1727213194587-0.0.1.1-604800000
      host:
      - api.openai.com
      user-agent:
-      - OpenAI/Python 1.47.0
+      - OpenAI/Python 1.109.1
      x-stainless-arch:
      - arm64
      x-stainless-async:
@@ -39,30 +132,33 @@ interactions:
      x-stainless-os:
      - MacOS
      x-stainless-package-version:
-      - 1.47.0
-      x-stainless-raw-response:
-      - 'true'
+      - 1.109.1
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
      x-stainless-runtime:
      - CPython
      x-stainless-runtime-version:
-      - 3.11.7
+      - 3.12.9
    method: POST
    uri: https://api.openai.com/v1/chat/completions
  response:
-    content: "{\n  \"id\": \"chatcmpl-AB7WTXzhDaFVbUrrQKXCo78KID8N9\",\n  \"object\":
-      \"chat.completion\",\n  \"created\": 1727213889,\n  \"model\": \"gpt-3.5-turbo-0125\",\n
-      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
-      \"assistant\",\n        \"content\": \"I now can give a great answer\\nFinal
-      Answer: The quick brown fox jumps over the lazy dog. This sentence contains
-      every letter of the alphabet.\",\n        \"refusal\": null\n      },\n      \"logprobs\":
-      null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
-      190,\n    \"completion_tokens\": 30,\n    \"total_tokens\": 220,\n    \"completion_tokens_details\":
-      {\n      \"reasoning_tokens\": 0\n    }\n  },\n  \"system_fingerprint\": null\n}\n"
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFPBbtswDL37Kwidk6BOmgbLbRgwYLdtCLAVaxHIEm2rkUVVopOmRf59
+        kJLG6dYBuxgwH9/z4yP9UgAIo8UShGolq87b8afbXTfbr74/89erb2o/axY0f7x1RkX+8VOMEoOq
+        B1T8ypoo6rxFNuSOsAooGZNqubiZXl/Py3KegY402kRrPI9nk/mY+1DR+Kqczk/MlozCKJbwqwAA
+        eMnP5NFpfBJLuBq9VjqMUTYolucmABHIpoqQMZrI0rEYDaAix+iy7S/QO40htWjgFoFl3EB62Rlr
+        wQdSiBqYoDFbzB0VRobaOGlBurjDMLlzd+5zLnzMhSWsWoTH3qgNVIF2Dmp6goe+8xFoiyHLWPm8
+        B03NBFatiRAxeVIIyZw0LgJuMezBIjMGoDqTpPWtrJAnl+MErPsoU5yut/YCkM4Ry7SOHOT9CTmc
+        o7PU+EBV/IMqauNMbNcBZSSXYopMXmT0UADc5xX1b1IXPlDnec20wfy58kN51BPDVQzo7OYEMrG0
+        Q306XYze0VtrZGlsvFiyUFK1qAfqcBGy14YugOJi6r/dvKd9nNy45n/kB0Ap9Ix67QNqo95OPLQF
+        TD/Nv9rOKWfDImLYGoVrNhjSJjTWsrfHcxZxHxm7dW1cg8EHk286bbI4FL8BAAD//wMAHFSnRdID
+        AAA=
    headers:
-      CF-Cache-Status:
-      - DYNAMIC
      CF-RAY:
-      - 8c85eb7568111cf3-GRU
+      - 99a5d4d0bb8f7327-EWR
      Connection:
      - keep-alive
      Content-Encoding:
@@ -70,37 +166,54 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Tue, 24 Sep 2024 21:38:09 GMT
+      - Thu, 06 Nov 2025 16:05:16 GMT
      Server:
      - cloudflare
+      Set-Cookie:
+      - __cf_bm=REDACTED;
+        path=/; expires=Thu, 06-Nov-25 16:35:16 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=REDACTED;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
      Transfer-Encoding:
      - chunked
      X-Content-Type-Options:
      - nosniff
      access-control-expose-headers:
      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
      openai-organization:
-      - crewai-iuxna1
+      - user-REDACTED
      openai-processing-ms:
-      - '662'
+      - '836'
+      openai-project:
+      - proj_REDACTED
      openai-version:
      - '2020-10-01'
-      strict-transport-security:
-      - max-age=31536000; includeSubDomains; preload
+      x-envoy-upstream-service-time:
+      - '983'
+      x-openai-proxy-wasm:
+      - v0.1
      x-ratelimit-limit-requests:
      - '10000'
      x-ratelimit-limit-tokens:
-      - '50000000'
+      - '200000'
      x-ratelimit-remaining-requests:
      - '9999'
      x-ratelimit-remaining-tokens:
-      - '49999772'
+      - '199785'
      x-ratelimit-reset-requests:
-      - 6ms
+      - 8.64s
      x-ratelimit-reset-tokens:
-      - 0s
+      - 64ms
      x-request-id:
-      - req_833406276d399714b624a32627fc5b4a
-    http_version: HTTP/1.1
-    status_code: 200
+      - req_c302b31f8f804399ae05fc424215303a
+    status:
+      code: 200
+      message: OK
 version: 1
--- a/lib/crewai/tests/cassettes/test_agent_respect_the_max_rpm_set.yaml
+++ b/lib/crewai/tests/cassettes/test_agent_respect_the_max_rpm_set.yaml
--- a/lib/crewai/tests/cassettes/test_agent_with_only_crewai_knowledge.yaml
+++ b/lib/crewai/tests/cassettes/test_agent_with_only_crewai_knowledge.yaml
@@ -1,54 +1,165 @@
 interactions:
 - request:
-    body: '{"model": "openai/gpt-4o-mini", "messages": [{"role": "system", "content":
-      "Your goal is to rewrite the user query so that it is optimized for retrieval
-      from a vector database. Consider how the query will be used to find relevant
-      documents, and aim to make it more specific and context-aware. \n\n Do not include
-      any other text than the rewritten query, especially any preamble or postamble
-      and only add expected output format if its relevant to the rewritten query.
-      \n\n Focus on the key words of the intended task and to retrieve the most relevant
-      information. \n\n There will be some extra context provided that might need
-      to be removed such as expected_output formats structured_outputs and other instructions."},
-      {"role": "user", "content": "The original query is: What is Vidit''s favorite
-      color?\n\nThis is the expected criteria for your final answer: Vidit''s favorclearite
-      color.\nyou MUST return the actual complete content as the final answer, not
-      a summary.."}], "stream": false, "stop": ["\nObservation:"]}'
+    body: '{"trace_id": "9d9d9d14-e5bc-44bc-8cfc-3df9ba4e6055", "execution_type":
+      "crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
+      "crew_name": "crew", "flow_name": null, "crewai_version": "1.3.0", "privacy_level":
+      "standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
+      0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2025-11-06T15:58:15.778396+00:00"},
+      "ephemeral_trace_id": "9d9d9d14-e5bc-44bc-8cfc-3df9ba4e6055"}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '488'
+      Content-Type:
+      - application/json
+      User-Agent:
+      - CrewAI-CLI/1.3.0
+      X-Crewai-Version:
+      - 1.3.0
+    method: POST
+    uri: https://app.crewai.com/crewai_plus/api/v1/tracing/ephemeral/batches
+  response:
+    body:
+      string: '{"id":"f303021e-f1a0-4fd8-9c7d-8ba6779f8ad3","ephemeral_trace_id":"9d9d9d14-e5bc-44bc-8cfc-3df9ba4e6055","execution_type":"crew","crew_name":"crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.3.0","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"crew","flow_name":null,"crewai_version":"1.3.0","privacy_level":"standard"},"created_at":"2025-11-06T15:58:16.189Z","updated_at":"2025-11-06T15:58:16.189Z","access_code":"TRACE-c2990cd4d4","user_identifier":null}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '515'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Thu, 06 Nov 2025 15:58:16 GMT
+      cache-control:
+      - no-store
+      content-security-policy:
+      - 'default-src ''self'' *.app.crewai.com app.crewai.com; script-src ''self''
+        ''unsafe-inline'' *.app.crewai.com app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts
+        https://www.gstatic.com https://run.pstmn.io https://apis.google.com https://apis.google.com/js/api.js
+        https://accounts.google.com https://accounts.google.com/gsi/client https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css.map
+        https://*.google.com https://docs.google.com https://slides.google.com https://js.hs-scripts.com
+        https://js.sentry-cdn.com https://browser.sentry-cdn.com https://www.googletagmanager.com
+        https://js-na1.hs-scripts.com https://js.hubspot.com http://js-na1.hs-scripts.com
+        https://bat.bing.com https://cdn.amplitude.com https://cdn.segment.com https://d1d3n03t5zntha.cloudfront.net/
+        https://descriptusercontent.com https://edge.fullstory.com https://googleads.g.doubleclick.net
+        https://js.hs-analytics.net https://js.hs-banner.com https://js.hsadspixel.net
+        https://js.hscollectedforms.net https://js.usemessages.com https://snap.licdn.com
+        https://static.cloudflareinsights.com https://static.reo.dev https://www.google-analytics.com
+        https://share.descript.com/; style-src ''self'' ''unsafe-inline'' *.app.crewai.com
+        app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts; img-src ''self'' data:
+        *.app.crewai.com app.crewai.com https://zeus.tools.crewai.com https://dashboard.tools.crewai.com
+        https://cdn.jsdelivr.net https://forms.hsforms.com https://track.hubspot.com
+        https://px.ads.linkedin.com https://px4.ads.linkedin.com https://www.google.com
+        https://www.google.com.br; font-src ''self'' data: *.app.crewai.com app.crewai.com;
+        connect-src ''self'' *.app.crewai.com app.crewai.com https://zeus.tools.crewai.com
+        https://connect.useparagon.com/ https://zeus.useparagon.com/* https://*.useparagon.com/*
+        https://run.pstmn.io https://connect.tools.crewai.com/ https://*.sentry.io
+        https://www.google-analytics.com https://edge.fullstory.com https://rs.fullstory.com
+        https://api.hubspot.com https://forms.hscollectedforms.net https://api.hubapi.com
+        https://px.ads.linkedin.com https://px4.ads.linkedin.com https://google.com/pagead/form-data/16713662509
+        https://google.com/ccm/form-data/16713662509 https://www.google.com/ccm/collect
+        https://worker-actionkit.tools.crewai.com https://api.reo.dev; frame-src ''self''
+        *.app.crewai.com app.crewai.com https://connect.useparagon.com/ https://zeus.tools.crewai.com
+        https://zeus.useparagon.com/* https://connect.tools.crewai.com/ https://docs.google.com
+        https://drive.google.com https://slides.google.com https://accounts.google.com
+        https://*.google.com https://app.hubspot.com/ https://td.doubleclick.net https://www.googletagmanager.com/
+        https://www.youtube.com https://share.descript.com'
+      etag:
+      - W/"8df0b730688b8bc094b74c66a6293578"
+      expires:
+      - '0'
+      permissions-policy:
+      - camera=(), microphone=(self), geolocation=()
+      pragma:
+      - no-cache
+      referrer-policy:
+      - strict-origin-when-cross-origin
+      strict-transport-security:
+      - max-age=63072000; includeSubDomains
+      vary:
+      - Accept
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+      x-permitted-cross-domain-policies:
+      - none
+      x-request-id:
+      - 38352441-7508-4e1e-9bff-77d1689dffdf
+      x-runtime:
+      - '0.085540'
+      x-xss-protection:
+      - 1; mode=block
+    status:
+      code: 201
+      message: Created
+- request:
+    body: '{"messages":[{"role":"system","content":"Your goal is to rewrite the user
+      query so that it is optimized for retrieval from a vector database. Consider
+      how the query will be used to find relevant documents, and aim to make it more
+      specific and context-aware. \n\n Do not include any other text than the rewritten
+      query, especially any preamble or postamble and only add expected output format
+      if its relevant to the rewritten query. \n\n Focus on the key words of the intended
+      task and to retrieve the most relevant information. \n\n There will be some
+      extra context provided that might need to be removed such as expected_output
+      formats structured_outputs and other instructions."},{"role":"user","content":"The
+      original query is: What is Vidit''s favorite color?\n\nThis is the expected
+      criteria for your final answer: Vidit''s favorite color.\nyou MUST return the
+      actual complete content as the final answer, not a summary.."}],"model":"gpt-4o-mini"}'
    headers:
      accept:
-      - '*/*'
+      - application/json
      accept-encoding:
-      - gzip, deflate
+      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
-      - '1017'
+      - '950'
      content-type:
      - application/json
      host:
-      - openrouter.ai
-      http-referer:
-      - https://litellm.ai
+      - api.openai.com
      user-agent:
-      - litellm/1.68.0
-      x-title:
-      - liteLLM
+      - OpenAI/Python 1.109.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.109.1
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
    method: POST
-    uri: https://openrouter.ai/api/v1/chat/completions
+    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//4lKAAS4AAAAA//90kE1PIzEMhv8Kei97Sdnplwq5gTgAF8ShcFitRmnG
-        nTFk4ihxq11V899Xs6gFJLja78djH8ANLFqKk+lqsZpP56vpYqJhublfP1eP65v1i79Lt9fdMwxS
-        lj03lGHxkChe3cGgl4YCLCRRdPyzTTpZyKTnyDCQzQt5hYXvnJ576VMgZYkw8JmcUgP7XmvgO2FP
-        BfbXAUHalGVTYOMuBIMtRy5dnckVibAoKgkG0Snvqf5my7GhP7CVQU+luJZgD8gSCBauFC7qoo40
-        EpXiSPrEDeuPcrZ1e8msdOYlSIZBpu2uuHDEeWvi2L4NhuG3QflblPqRpaWcMv8P3Ka6ml/OLmaz
-        6rKCwe6IkbL0SWuVV4pl/MNy5Di+6DRfGqioC+/Ci8p8NtcNqeNQxlTvfEfNSVwNX4R+1J/u+GAZ
-        hn8AAAD//wMAIwJ79CICAAA=
+        H4sIAAAAAAAAAwAAAP//jFJBbtswELzrFQQvvViBLMuy42sObYEWKIoiQFMEAkOu5G0oLkGu0xaB
+        /15QciwlTYFceODsDGeG+5gJIdHInZB6r1j33uZX33+1H78y9tvVt+Lmpv68KrfXX96Xnz7cu61c
+        JAbd/QTNT6wLTb23wEhuhHUAxZBUl5u6rKqqvqwHoCcDNtE6z3lFeY8O87Ioq7zY5MuTuN4Taohy
+        J35kQgjxOJzJpzPwW+5EsXi66SFG1YHcnYeEkIFsupEqRoysHMvFBGpyDG6wfo0G+V0UrXqggAxC
+        k6VwMZ8O0B6iSo7dwdoZoJwjVinx4PP2hBzPzix1PtBdfEGVLTqM+yaAiuSSi8jk5YAeMyFuhwYO
+        z0JJH6j33DDdw/DccrMa9eRU/ISuTxgTKzsnbRevyDUGWKGNswqlVnoPZqJOfauDQZoB2Sz0v2Ze
+        0x6Do+veIj8BWoNnMI0PYFA/DzyNBUhr+b+xc8mDYRkhPKCGhhFC+ggDrTrYcVlk/BMZ+qZF10Hw
+        AceNaX2zrgvV1rBeX8rsmP0FAAD//wMA5SIzeT8DAAA=
    headers:
-      Access-Control-Allow-Origin:
-      - '*'
      CF-RAY:
-      - 9402c9db99ec4722-BOM
+      - 99a5ca96bb1443e7-EWR
      Connection:
      - keep-alive
      Content-Encoding:
@@ -56,73 +167,122 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Thu, 15 May 2025 12:55:14 GMT
+      - Thu, 06 Nov 2025 15:58:16 GMT
      Server:
      - cloudflare
+      Set-Cookie:
+      - __cf_bm=REDACTED;
+        path=/; expires=Thu, 06-Nov-25 16:28:16 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=REDACTED;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
      Transfer-Encoding:
      - chunked
-      Vary:
-      - Accept-Encoding
-      x-clerk-auth-message:
-      - Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid,
-        token-carrier=header)
-      x-clerk-auth-reason:
-      - token-invalid
-      x-clerk-auth-status:
-      - signed-out
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - user-REDACTED
+      openai-processing-ms:
+      - '235'
+      openai-project:
+      - proj_REDACTED
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '420'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '200000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '199785'
+      x-ratelimit-reset-requests:
+      - 8.64s
+      x-ratelimit-reset-tokens:
+      - 64ms
+      x-request-id:
+      - req_9810e9721aa9463c930414ab5174ab61
    status:
      code: 200
      message: OK
 - request:
-    body: '{"model": "openai/gpt-4o-mini", "messages": [{"role": "system", "content":
-      "You are Information Agent. You have access to specific knowledge sources.\nYour
-      personal goal is: Provide information based on knowledge sources\nTo give my
-      best complete final answer to the task respond using the exact following format:\n\nThought:
-      I now can give a great answer\nFinal Answer: Your final answer must be the great
-      and the most complete as possible, it must be outcome described.\n\nI MUST use
-      these formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent
-      Task: What is Vidit''s favorite color?\n\nThis is the expected criteria for
-      your final answer: Vidit''s favorclearite color.\nyou MUST return the actual
-      complete content as the final answer, not a summary.\n\nBegin! This is VERY
-      important to you, use the tools available and give your best Final Answer, your
-      job depends on it!\n\nThought:"}], "stream": false, "stop": ["\nObservation:"]}'
+    body: '{"messages":[{"role":"system","content":"You are Information Agent. You
+      have access to specific knowledge sources.\nYour personal goal is: Provide information
+      based on knowledge sources\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"},{"role":"user","content":"\nCurrent Task: What is Vidit''s
+      favorite color?\n\nThis is the expected criteria for your final answer: Vidit''s
+      favorite color.\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nBegin! This is VERY important to you, use the tools available
+      and give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-4o-mini"}'
    headers:
      accept:
-      - '*/*'
+      - application/json
      accept-encoding:
-      - gzip, deflate
+      - gzip, deflate, zstd
      connection:
      - keep-alive
      content-length:
-      - '951'
+      - '884'
      content-type:
      - application/json
+      cookie:
+      - __cf_bm=REDACTED;
+        _cfuvid=REDACTED
      host:
-      - openrouter.ai
-      http-referer:
-      - https://litellm.ai
+      - api.openai.com
      user-agent:
-      - litellm/1.68.0
-      x-title:
-      - liteLLM
+      - OpenAI/Python 1.109.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.109.1
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.9
    method: POST
-    uri: https://openrouter.ai/api/v1/chat/completions
+    uri: https://api.openai.com/v1/chat/completions
  response:
    body:
      string: !!binary |
-        H4sIAAAAAAAAAwAAAP//4lKAAS4AAAAA///iQjABAAAA//90kN1qGzEQRl9FfNdyul4nday73ARy
-        VUpLE2jLIu+O15NoZ4QkOy1moa/R1+uTlE1wnEB7qU/zc84cwB0cepLZfHm+XMwXy/nF7II/3d7V
-        H+tOPvsS3le3d+keFjHpnjtKcPgQSa5uYDFoRwEOGkk8v+tjmZ3rbGBhWOj6ntoCh3bry1mrQwxU
-        WAUWbSJfqIM7rbVot8otZbivBwTtY9J1hpNdCBYbFs7bJpHPKnDIRSMsxBfeU/OfX5aOfsBVFgPl
-        7HuCOyBpIDj4nDkXL2WiUSkkE+mNEX00rRfT856MN/0EarzkR0rGfJNrFh/M1dPbmS/ccfnz63c2
-        G7/XxIVMq0GT4WzWYUdnsEi02WUfjiLPjCz9czCO3y3yz1xomCx6SjHxE8omNtViVV/WdbWqYLE7
-        CsSkQyxN0QeSPF2wmgyOxz3lK4uixYdTcrmyb7ubjornkKexrW+31L0UV+M/pr6ufxF51TKOfwEA
-        AP//AwBybekMaAIAAA==
+        H4sIAAAAAAAAAwAAAP//jFPBahsxEL37KwZderGN7dqO41vaUghtT4FCacIiS7PrSbQaVZq1swT/
+        e9HayTptCr0ING/e6M2b0dMAQJFVa1Bmq8XUwY0+/tiXXy4/2BDN/p7izdY4/vrp29Wvmxbv1TAz
+        eHOPRp5ZY8N1cCjE/gibiFowV51eLGfz+Xx5ueqAmi26TKuCjOY8qsnTaDaZzUeTi9F0dWJvmQwm
+        tYafAwCAp+7MOr3FR7WGyfA5UmNKukK1fkkCUJFdjiidEiXRXtSwBw17Qd9JvwbPezDaQ0U7BA1V
+        lg3apz1GgFv/mbx2cNXd1/CdLMm7BKXecSRBMOw4AiXwLBCajSPjWrBsmhq9oAWOsCeLroUHz3s/
+        husSWm5gq3cIKaChkgx0ih4lZ1sUTS6B3nAjxweHcA21bmGDoDcOQRhC5B3ZLLjmiJApHNFCxBTY
+        Jxyf9xuxbJLOnvvGuTNAe8+i88w6p+9OyOHFW8dViLxJf1BVSZ7StoioE/vsYxIOqkMPA4C7bobN
+        q7GoELkOUgg/YPfcdLk61lP96vTofHEChUW7Pj6bvh++Ua842Xa2Bcpos0XbU/uV0Y0lPgMGZ13/
+        reat2sfOyVf/U74HjMEgaIsQ0ZJ53XGfFjH/rH+lvbjcCVYJ444MFkIY8yQslrpxx31XqU2CdVGS
+        rzCGSMelL0OxWE50ucTF4lINDoPfAAAA//8DAPFGfbMCBAAA
    headers:
-      Access-Control-Allow-Origin:
-      - '*'
      CF-RAY:
-      - 9402c9e1b94a4722-BOM
+      - 99a5ca9c5ef543e7-EWR
      Connection:
      - keep-alive
      Content-Encoding:
@@ -130,20 +290,47 @@ interactions:
      Content-Type:
      - application/json
      Date:
-      - Thu, 15 May 2025 12:55:15 GMT
+      - Thu, 06 Nov 2025 15:58:19 GMT
      Server:
      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
      Transfer-Encoding:
      - chunked
-      Vary:
-      - Accept-Encoding
-      x-clerk-auth-message:
-      - Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid,
-        token-carrier=header)
-      x-clerk-auth-reason:
-      - token-invalid
-      x-clerk-auth-status:
-      - signed-out
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - user-REDACTED
+      openai-processing-ms:
+      - '1326'
+      openai-project:
+      - proj_REDACTED
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '1754'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '200000'
+      x-ratelimit-remaining-requests:
+      - '9998'
+      x-ratelimit-remaining-tokens:
+      - '199803'
+      x-ratelimit-reset-requests:
+      - 15.913s
+      x-ratelimit-reset-tokens:
+      - 59ms
+      x-request-id:
+      - req_f975e16b666e498b8bcfdfab525f71b3
    status:
      code: 200
      message: OK
--- a/lib/crewai/tests/cassettes/test_tool_usage_information_is_appended_to_agent.yaml
+++ b/lib/crewai/tests/cassettes/test_tool_usage_information_is_appended_to_agent.yaml
--- a/lib/crewai/tests/knowledge/test_knowledge.py
+++ b/lib/crewai/tests/knowledge/test_knowledge.py
@@ -601,3 +601,81 @@ def test_file_path_validation():
        match="file_path/file_paths must be a Path, str, or a list of these types",
    ):
        PDFKnowledgeSource()
+
+
+def test_hash_based_id_generation_without_doc_id(mock_vector_db):
+    """Test that documents without doc_id generate hash-based IDs. Duplicates are deduplicated before upsert."""
+    import hashlib
+    import json
+    from crewai.rag.chromadb.utils import _prepare_documents_for_chromadb
+    from crewai.rag.types import BaseRecord
+
+    documents: list[BaseRecord] = [
+        {"content": "First document content", "metadata": {"source": "test1", "category": "research"}},
+        {"content": "Second document content", "metadata": {"source": "test2", "category": "research"}},
+        {"content": "Third document content"},  # No metadata
+    ]
+
+    result = _prepare_documents_for_chromadb(documents)
+
+    assert len(result.ids) == 3
+
+    # Unique documents should get 64-character hex hashes (no suffix)
+    for doc_id in result.ids:
+        assert len(doc_id) == 64, f"ID should be 64 characters: {doc_id}"
+        assert all(c in "0123456789abcdef" for c in doc_id), f"ID should be hex: {doc_id}"
+
+    # Different documents should have different hashes
+    assert result.ids[0] != result.ids[1] != result.ids[2]
+
+    # Verify hashes match expected values
+    expected_hash_1 = hashlib.sha256(
+        f"First document content|{json.dumps({'category': 'research', 'source': 'test1'}, sort_keys=True)}".encode()
+    ).hexdigest()
+    assert result.ids[0] == expected_hash_1, "First document hash should match expected"
+
+    expected_hash_3 = hashlib.sha256("Third document content".encode()).hexdigest()
+    assert result.ids[2] == expected_hash_3, "Third document hash should match expected"
+
+    # Test that duplicate documents are deduplicated (same ID, only one sent)
+    duplicate_documents: list[BaseRecord] = [
+        {"content": "Same content", "metadata": {"source": "test"}},
+        {"content": "Same content", "metadata": {"source": "test"}},
+        {"content": "Same content", "metadata": {"source": "test"}},
+    ]
+    duplicate_result = _prepare_documents_for_chromadb(duplicate_documents)
+    # Duplicates should be deduplicated - only one ID should remain
+    assert len(duplicate_result.ids) == 1, "Duplicate documents should be deduplicated"
+    assert len(duplicate_result.ids[0]) == 64, "Deduplicated ID should be clean hash"
+    # Verify it's the expected hash
+    expected_hash = hashlib.sha256(
+        f"Same content|{json.dumps({'source': 'test'}, sort_keys=True)}".encode()
+    ).hexdigest()
+    assert duplicate_result.ids[0] == expected_hash, "Deduplicated ID should match expected hash"
+
+
+def test_hash_based_id_generation_with_doc_id_in_metadata(mock_vector_db):
+    """Test that documents with doc_id in metadata use the doc_id directly, not hash-based."""
+    from crewai.rag.chromadb.utils import _prepare_documents_for_chromadb
+    from crewai.rag.types import BaseRecord
+
+    documents_with_doc_id: list[BaseRecord] = [
+        {"content": "First document", "metadata": {"doc_id": "custom-id-1", "source": "test1"}},
+        {"content": "Second document", "metadata": {"doc_id": "custom-id-2"}},
+    ]
+
+    documents_without_doc_id: list[BaseRecord] = [
+        {"content": "First document", "metadata": {"source": "test1"}},
+        {"content": "Second document"},
+    ]
+
+    result_with_doc_id = _prepare_documents_for_chromadb(documents_with_doc_id)
+    result_without_doc_id = _prepare_documents_for_chromadb(documents_without_doc_id)
+
+    assert result_with_doc_id.ids == ["custom-id-1", "custom-id-2"]
+
+    assert len(result_without_doc_id.ids) == 2
+    # Unique documents get 64-character hashes
+    for doc_id in result_without_doc_id.ids:
+        assert len(doc_id) == 64, "ID should be 64 characters"
+        assert all(c in "0123456789abcdef" for c in doc_id), "ID should be hex"
--- a/lib/crewai/tests/llms/hooks/test_transport.py
+++ b/lib/crewai/tests/llms/hooks/test_transport.py
@@ -6,7 +6,7 @@ import httpx
 import pytest

 from crewai.llms.hooks.base import BaseInterceptor
-from crewai.llms.hooks.transport import AsyncHTTPransport, HTTPTransport
+from crewai.llms.hooks.transport import AsyncHTTPTransport, HTTPTransport


 class TrackingInterceptor(BaseInterceptor[httpx.Request, httpx.Response]):
@@ -128,7 +128,7 @@ class TestAsyncHTTPTransport:
    def test_async_transport_instantiation(self) -> None:
        """Test that async transport can be instantiated with interceptor."""
        interceptor = TrackingInterceptor()
-        transport = AsyncHTTPransport(interceptor=interceptor)
+        transport = AsyncHTTPTransport(interceptor=interceptor)

        assert transport.interceptor is interceptor

@@ -136,13 +136,13 @@ class TestAsyncHTTPTransport:
        """Test that async transport requires interceptor parameter."""
        # AsyncHTTPransport requires an interceptor parameter
        with pytest.raises(TypeError):
-            AsyncHTTPransport()
+            AsyncHTTPTransport()

    @pytest.mark.asyncio
    async def test_async_interceptor_called_on_request(self) -> None:
        """Test that async interceptor hooks are called during request handling."""
        interceptor = TrackingInterceptor()
-        transport = AsyncHTTPransport(interceptor=interceptor)
+        transport = AsyncHTTPTransport(interceptor=interceptor)

        # Create a mock parent transport that returns a response
        mock_response = httpx.Response(200, json={"success": True})
@@ -217,7 +217,7 @@ class TestTransportIntegration:
    async def test_multiple_async_requests_same_interceptor(self) -> None:
        """Test that multiple async requests through same interceptor are tracked."""
        interceptor = TrackingInterceptor()
-        transport = AsyncHTTPransport(interceptor=interceptor)
+        transport = AsyncHTTPTransport(interceptor=interceptor)

        mock_response = httpx.Response(200)
Author	SHA1	Message	Date
Lucas Gomide	fed3196ffe	replace print by logger	2025-11-06 20:45:00 -03:00
Lucas Gomide	2997e6c3ad	add debuging to Tool calls	2025-11-06 17:47:32 -03:00
Greyson LaLonde	9e5906c52f	feat: add pydantic validation dunder to BaseInterceptor Some checks failed CodeQL Advanced / Analyze (actions) (push) Has been cancelled Details CodeQL Advanced / Analyze (python) (push) Has been cancelled Details Notify Downstream / notify-downstream (push) Has been cancelled Details	2025-11-06 15:27:07 -05:00
Lorenze Jay	fc521839e4	Lorenze/fix duplicating doc ids for knowledge (#3840 ) * fix: update document ID handling in ChromaDB utility functions to use SHA-256 hashing and include index for uniqueness * test: add tests for hash-based ID generation in ChromaDB utility functions * drop idx for preventing dups, upsert should handle dups * fix: update document ID extraction logic in ChromaDB utility functions to check for doc_id at the top level of the document * fix: enhance document ID generation in ChromaDB utility functions to deduplicate documents and ensure unique hash-based IDs without suffixes * fix: improve error handling and document ID generation in ChromaDB utility functions to ensure robust processing and uniqueness	2025-11-06 10:59:52 -08:00