Compare commits

...

4 Commits

Author SHA1 Message Date
Lucas Gomide
fed3196ffe replace print by logger 2025-11-06 20:45:00 -03:00
Lucas Gomide
2997e6c3ad add debuging to Tool calls 2025-11-06 17:47:32 -03:00
Greyson LaLonde
9e5906c52f feat: add pydantic validation dunder to BaseInterceptor
Some checks failed
CodeQL Advanced / Analyze (actions) (push) Has been cancelled
CodeQL Advanced / Analyze (python) (push) Has been cancelled
Notify Downstream / notify-downstream (push) Has been cancelled
2025-11-06 15:27:07 -05:00
Lorenze Jay
fc521839e4 Lorenze/fix duplicating doc ids for knowledge (#3840)
* fix: update document ID handling in ChromaDB utility functions to use SHA-256 hashing and include index for uniqueness

* test: add tests for hash-based ID generation in ChromaDB utility functions

* drop idx for preventing dups, upsert should handle dups

* fix: update document ID extraction logic in ChromaDB utility functions to check for doc_id at the top level of the document

* fix: enhance document ID generation in ChromaDB utility functions to deduplicate documents and ensure unique hash-based IDs without suffixes

* fix: improve error handling and document ID generation in ChromaDB utility functions to ensure robust processing and uniqueness
2025-11-06 10:59:52 -08:00
12 changed files with 1881 additions and 2250 deletions

View File

@@ -10,7 +10,7 @@ from crewai_tools.tools.crewai_platform_tools.misc import (
get_platform_api_base_url,
get_platform_integration_token,
)
from crewai.utilities.logger import Logger
class CrewaiPlatformToolBuilder:
def __init__(
@@ -20,6 +20,7 @@ class CrewaiPlatformToolBuilder:
self._apps = apps
self._actions_schema = {} # type: ignore[var-annotated]
self._tools = None
self._logger = Logger(verbose=True)
def tools(self) -> list[BaseTool]:
if self._tools is None:
@@ -30,6 +31,7 @@ class CrewaiPlatformToolBuilder:
def _fetch_actions(self):
actions_url = f"{get_platform_api_base_url()}/actions"
headers = {"Authorization": f"Bearer {get_platform_integration_token()}"}
self._logger.log("info", f"Fetch actions: {actions_url}, {headers}, {','.join(self._apps)}")
try:
response = requests.get(
@@ -39,11 +41,13 @@ class CrewaiPlatformToolBuilder:
params={"apps": ",".join(self._apps)},
)
response.raise_for_status()
except Exception:
except Exception as e:
self._logger.log("error", f"Error fetching actions: {e!s}")
return
raw_data = response.json()
raw_data = response.json()
print("->>>>> raw_data:", raw_data)
self._actions_schema = {}
action_categories = raw_data.get("actions", {})

View File

@@ -640,6 +640,7 @@ class Agent(BaseAgent):
def get_platform_tools(self, apps: list[PlatformAppOrAction]) -> list[BaseTool]:
try:
self._logger.log("info", f"Start get_platform_tools: {apps}")
from crewai_tools import (
CrewaiPlatformTools,
)

View File

@@ -7,7 +7,14 @@ outbound and inbound messages at the transport level.
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Generic, TypeVar
from typing import TYPE_CHECKING, Any, Generic, TypeVar
from pydantic_core import core_schema
if TYPE_CHECKING:
from pydantic import GetCoreSchemaHandler
from pydantic_core import CoreSchema
T = TypeVar("T")
@@ -25,6 +32,7 @@ class BaseInterceptor(ABC, Generic[T, U]):
U: Inbound message type (e.g., httpx.Response)
Example:
>>> import httpx
>>> class CustomInterceptor(BaseInterceptor[httpx.Request, httpx.Response]):
... def on_outbound(self, message: httpx.Request) -> httpx.Request:
... message.headers["X-Custom-Header"] = "value"
@@ -80,3 +88,46 @@ class BaseInterceptor(ABC, Generic[T, U]):
Modified message object.
"""
raise NotImplementedError
@classmethod
def __get_pydantic_core_schema__(
cls, _source_type: Any, _handler: GetCoreSchemaHandler
) -> CoreSchema:
"""Generate Pydantic core schema for BaseInterceptor.
This allows the generic BaseInterceptor to be used in Pydantic models
without requiring arbitrary_types_allowed=True. The schema validates
that the value is an instance of BaseInterceptor.
Args:
_source_type: The source type being validated (unused).
_handler: Handler for generating schemas (unused).
Returns:
A Pydantic core schema that validates BaseInterceptor instances.
"""
return core_schema.no_info_plain_validator_function(
_validate_interceptor,
serialization=core_schema.plain_serializer_function_ser_schema(
lambda x: x, return_schema=core_schema.any_schema()
),
)
def _validate_interceptor(value: Any) -> BaseInterceptor[T, U]:
"""Validate that the value is a BaseInterceptor instance.
Args:
value: The value to validate.
Returns:
The validated BaseInterceptor instance.
Raises:
ValueError: If the value is not a BaseInterceptor instance.
"""
if not isinstance(value, BaseInterceptor):
raise ValueError(
f"Expected BaseInterceptor instance, got {type(value).__name__}"
)
return value

View File

@@ -6,16 +6,53 @@ to enable request/response modification at the transport level.
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from collections.abc import Iterable
from typing import TYPE_CHECKING, TypedDict
import httpx
from httpx import (
AsyncHTTPTransport as _AsyncHTTPTransport,
HTTPTransport as _HTTPTransport,
)
from typing_extensions import NotRequired, Unpack
if TYPE_CHECKING:
from ssl import SSLContext
from httpx import Limits, Request, Response
from httpx._types import CertTypes, ProxyTypes
from crewai.llms.hooks.base import BaseInterceptor
class HTTPTransport(httpx.HTTPTransport):
class HTTPTransportKwargs(TypedDict):
"""Typed dictionary for httpx.HTTPTransport initialization parameters.
These parameters configure the underlying HTTP transport behavior including
SSL verification, proxies, connection limits, and low-level socket options.
"""
verify: bool | str | SSLContext
cert: NotRequired[CertTypes | None]
trust_env: bool
http1: bool
http2: bool
limits: Limits
proxy: NotRequired[ProxyTypes | None]
uds: NotRequired[str | None]
local_address: NotRequired[str | None]
retries: int
socket_options: NotRequired[
Iterable[
tuple[int, int, int]
| tuple[int, int, bytes | bytearray]
| tuple[int, int, None, int]
]
| None
]
class HTTPTransport(_HTTPTransport):
"""HTTP transport that uses an interceptor for request/response modification.
This transport is used internally when a user provides a BaseInterceptor.
@@ -25,19 +62,19 @@ class HTTPTransport(httpx.HTTPTransport):
def __init__(
self,
interceptor: BaseInterceptor[httpx.Request, httpx.Response],
**kwargs: Any,
interceptor: BaseInterceptor[Request, Response],
**kwargs: Unpack[HTTPTransportKwargs],
) -> None:
"""Initialize transport with interceptor.
Args:
interceptor: HTTP interceptor for modifying raw request/response objects.
**kwargs: Additional arguments passed to httpx.HTTPTransport.
**kwargs: HTTPTransport configuration parameters (verify, cert, proxy, etc.).
"""
super().__init__(**kwargs)
self.interceptor = interceptor
def handle_request(self, request: httpx.Request) -> httpx.Response:
def handle_request(self, request: Request) -> Response:
"""Handle request with interception.
Args:
@@ -51,7 +88,7 @@ class HTTPTransport(httpx.HTTPTransport):
return self.interceptor.on_inbound(response)
class AsyncHTTPransport(httpx.AsyncHTTPTransport):
class AsyncHTTPTransport(_AsyncHTTPTransport):
"""Async HTTP transport that uses an interceptor for request/response modification.
This transport is used internally when a user provides a BaseInterceptor.
@@ -61,19 +98,19 @@ class AsyncHTTPransport(httpx.AsyncHTTPTransport):
def __init__(
self,
interceptor: BaseInterceptor[httpx.Request, httpx.Response],
**kwargs: Any,
interceptor: BaseInterceptor[Request, Response],
**kwargs: Unpack[HTTPTransportKwargs],
) -> None:
"""Initialize async transport with interceptor.
Args:
interceptor: HTTP interceptor for modifying raw request/response objects.
**kwargs: Additional arguments passed to httpx.AsyncHTTPTransport.
**kwargs: HTTPTransport configuration parameters (verify, cert, proxy, etc.).
"""
super().__init__(**kwargs)
self.interceptor = interceptor
async def handle_async_request(self, request: httpx.Request) -> httpx.Response:
async def handle_async_request(self, request: Request) -> Response:
"""Handle async request with interception.
Args:

View File

@@ -67,31 +67,44 @@ def _prepare_documents_for_chromadb(
ids: list[str] = []
texts: list[str] = []
metadatas: list[Mapping[str, str | int | float | bool]] = []
seen_ids: dict[str, int] = {}
try:
for doc in documents:
if "doc_id" in doc:
doc_id = str(doc["doc_id"])
else:
metadata = doc.get("metadata")
if metadata and isinstance(metadata, dict) and "doc_id" in metadata:
doc_id = str(metadata["doc_id"])
else:
content_for_hash = doc["content"]
if metadata:
metadata_str = json.dumps(metadata, sort_keys=True)
content_for_hash = f"{content_for_hash}|{metadata_str}"
doc_id = hashlib.sha256(content_for_hash.encode()).hexdigest()
for doc in documents:
if "doc_id" in doc:
ids.append(doc["doc_id"])
else:
content_for_hash = doc["content"]
metadata = doc.get("metadata")
if metadata:
metadata_str = json.dumps(metadata, sort_keys=True)
content_for_hash = f"{content_for_hash}|{metadata_str}"
content_hash = hashlib.blake2b(
content_for_hash.encode(), digest_size=32
).hexdigest()
ids.append(content_hash)
texts.append(doc["content"])
metadata = doc.get("metadata")
if metadata:
if isinstance(metadata, list):
metadatas.append(metadata[0] if metadata and metadata[0] else {})
if isinstance(metadata, list):
processed_metadata = metadata[0] if metadata and metadata[0] else {}
else:
processed_metadata = metadata
else:
metadatas.append(metadata)
else:
metadatas.append({})
processed_metadata = {}
if doc_id in seen_ids:
idx = seen_ids[doc_id]
texts[idx] = doc["content"]
metadatas[idx] = processed_metadata
else:
idx = len(ids)
ids.append(doc_id)
texts.append(doc["content"])
metadatas.append(processed_metadata)
seen_ids[doc_id] = idx
except Exception as e:
raise ValueError(f"Error preparing documents for ChromaDB: {e}") from e
return PreparedDocuments(ids, texts, metadatas)

View File

@@ -2117,15 +2117,14 @@ def test_agent_with_only_crewai_knowledge():
goal="Provide information based on knowledge sources",
backstory="You have access to specific knowledge sources.",
llm=LLM(
model="openrouter/openai/gpt-4o-mini",
api_key=os.getenv("OPENROUTER_API_KEY"),
model="gpt-4o-mini",
),
)
# Create a task that requires the agent to use the knowledge
task = Task(
description="What is Vidit's favorite color?",
expected_output="Vidit's favorclearite color.",
expected_output="Vidit's favorite color.",
agent=agent,
)

View File

@@ -1,35 +1,128 @@
interactions:
- request:
body: '{"messages": [{"role": "system", "content": "You are test role. test backstory\nYour
body: '{"trace_id": "bf042234-54a3-4fc0-857d-1ae5585a174e", "execution_type":
"crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
"crew_name": "Unknown Crew", "flow_name": null, "crewai_version": "1.3.0", "privacy_level":
"standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2025-11-06T16:05:14.776800+00:00"}}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, zstd
Connection:
- keep-alive
Content-Length:
- '434'
Content-Type:
- application/json
User-Agent:
- CrewAI-CLI/1.3.0
X-Crewai-Version:
- 1.3.0
method: POST
uri: https://app.crewai.com/crewai_plus/api/v1/tracing/batches
response:
body:
string: '{"error":"bad_credentials","message":"Bad credentials"}'
headers:
Connection:
- keep-alive
Content-Length:
- '55'
Content-Type:
- application/json; charset=utf-8
Date:
- Thu, 06 Nov 2025 16:05:15 GMT
cache-control:
- no-store
content-security-policy:
- 'default-src ''self'' *.app.crewai.com app.crewai.com; script-src ''self''
''unsafe-inline'' *.app.crewai.com app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts
https://www.gstatic.com https://run.pstmn.io https://apis.google.com https://apis.google.com/js/api.js
https://accounts.google.com https://accounts.google.com/gsi/client https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css.map
https://*.google.com https://docs.google.com https://slides.google.com https://js.hs-scripts.com
https://js.sentry-cdn.com https://browser.sentry-cdn.com https://www.googletagmanager.com
https://js-na1.hs-scripts.com https://js.hubspot.com http://js-na1.hs-scripts.com
https://bat.bing.com https://cdn.amplitude.com https://cdn.segment.com https://d1d3n03t5zntha.cloudfront.net/
https://descriptusercontent.com https://edge.fullstory.com https://googleads.g.doubleclick.net
https://js.hs-analytics.net https://js.hs-banner.com https://js.hsadspixel.net
https://js.hscollectedforms.net https://js.usemessages.com https://snap.licdn.com
https://static.cloudflareinsights.com https://static.reo.dev https://www.google-analytics.com
https://share.descript.com/; style-src ''self'' ''unsafe-inline'' *.app.crewai.com
app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts; img-src ''self'' data:
*.app.crewai.com app.crewai.com https://zeus.tools.crewai.com https://dashboard.tools.crewai.com
https://cdn.jsdelivr.net https://forms.hsforms.com https://track.hubspot.com
https://px.ads.linkedin.com https://px4.ads.linkedin.com https://www.google.com
https://www.google.com.br; font-src ''self'' data: *.app.crewai.com app.crewai.com;
connect-src ''self'' *.app.crewai.com app.crewai.com https://zeus.tools.crewai.com
https://connect.useparagon.com/ https://zeus.useparagon.com/* https://*.useparagon.com/*
https://run.pstmn.io https://connect.tools.crewai.com/ https://*.sentry.io
https://www.google-analytics.com https://edge.fullstory.com https://rs.fullstory.com
https://api.hubspot.com https://forms.hscollectedforms.net https://api.hubapi.com
https://px.ads.linkedin.com https://px4.ads.linkedin.com https://google.com/pagead/form-data/16713662509
https://google.com/ccm/form-data/16713662509 https://www.google.com/ccm/collect
https://worker-actionkit.tools.crewai.com https://api.reo.dev; frame-src ''self''
*.app.crewai.com app.crewai.com https://connect.useparagon.com/ https://zeus.tools.crewai.com
https://zeus.useparagon.com/* https://connect.tools.crewai.com/ https://docs.google.com
https://drive.google.com https://slides.google.com https://accounts.google.com
https://*.google.com https://app.hubspot.com/ https://td.doubleclick.net https://www.googletagmanager.com/
https://www.youtube.com https://share.descript.com'
expires:
- '0'
permissions-policy:
- camera=(), microphone=(self), geolocation=()
pragma:
- no-cache
referrer-policy:
- strict-origin-when-cross-origin
strict-transport-security:
- max-age=63072000; includeSubDomains
vary:
- Accept
x-content-type-options:
- nosniff
x-frame-options:
- SAMEORIGIN
x-permitted-cross-domain-policies:
- none
x-request-id:
- 9e528076-59a8-4c21-a999-2367937321ed
x-runtime:
- '0.070063'
x-xss-protection:
- 1; mode=block
status:
code: 401
message: Unauthorized
- request:
body: '{"messages":[{"role":"system","content":"You are test role. test backstory\nYour
personal goal is: test goal\nTo give my best complete final answer to the task
use the exact following format:\n\nThought: I now can give a great answer\nFinal
Answer: Your final answer must be the great and the most complete as possible,
it must be outcome described.\n\nI MUST use these formats, my job depends on
it!"}, {"role": "user", "content": "\nCurrent Task: Summarize the given context
in one sentence\n\nThis is the expect criteria for your final answer: A one-sentence
summary\nyou MUST return the actual complete content as the final answer, not
a summary.\n\nThis is the context you''re working with:\nThe quick brown fox
jumps over the lazy dog. This sentence contains every letter of the alphabet.\n\nBegin!
This is VERY important to you, use the tools available and give your best Final
Answer, your job depends on it!\n\nThought:"}], "model": "gpt-3.5-turbo"}'
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"},{"role":"user","content":"\nCurrent Task: Summarize the given
context in one sentence\n\nThis is the expected criteria for your final answer:
A one-sentence summary\nyou MUST return the actual complete content as the final
answer, not a summary.\n\nThis is the context you''re working with:\nThe quick
brown fox jumps over the lazy dog. This sentence contains every letter of the
alphabet.\n\nBegin! This is VERY important to you, use the tools available and
give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-3.5-turbo"}'
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '961'
- '963'
content-type:
- application/json
cookie:
- __cf_bm=rb61BZH2ejzD5YPmLaEJqI7km71QqyNJGTVdNxBq6qk-1727213194-1.0.1.1-pJ49onmgX9IugEMuYQMralzD7oj_6W.CHbSu4Su1z3NyjTGYg.rhgJZWng8feFYah._oSnoYlkTjpK1Wd2C9FA;
_cfuvid=lbRdAddVWV6W3f5Dm9SaOPWDUOxqtZBSPr_fTW26nEA-1727213194587-0.0.1.1-604800000
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.47.0
- OpenAI/Python 1.109.1
x-stainless-arch:
- arm64
x-stainless-async:
@@ -39,30 +132,33 @@ interactions:
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.47.0
x-stainless-raw-response:
- 'true'
- 1.109.1
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.11.7
- 3.12.9
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
content: "{\n \"id\": \"chatcmpl-AB7WTXzhDaFVbUrrQKXCo78KID8N9\",\n \"object\":
\"chat.completion\",\n \"created\": 1727213889,\n \"model\": \"gpt-3.5-turbo-0125\",\n
\ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\":
\"assistant\",\n \"content\": \"I now can give a great answer\\nFinal
Answer: The quick brown fox jumps over the lazy dog. This sentence contains
every letter of the alphabet.\",\n \"refusal\": null\n },\n \"logprobs\":
null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\":
190,\n \"completion_tokens\": 30,\n \"total_tokens\": 220,\n \"completion_tokens_details\":
{\n \"reasoning_tokens\": 0\n }\n },\n \"system_fingerprint\": null\n}\n"
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//jFPBbtswDL37Kwidk6BOmgbLbRgwYLdtCLAVaxHIEm2rkUVVopOmRf59
kJLG6dYBuxgwH9/z4yP9UgAIo8UShGolq87b8afbXTfbr74/89erb2o/axY0f7x1RkX+8VOMEoOq
B1T8ypoo6rxFNuSOsAooGZNqubiZXl/Py3KegY402kRrPI9nk/mY+1DR+Kqczk/MlozCKJbwqwAA
eMnP5NFpfBJLuBq9VjqMUTYolucmABHIpoqQMZrI0rEYDaAix+iy7S/QO40htWjgFoFl3EB62Rlr
wQdSiBqYoDFbzB0VRobaOGlBurjDMLlzd+5zLnzMhSWsWoTH3qgNVIF2Dmp6goe+8xFoiyHLWPm8
B03NBFatiRAxeVIIyZw0LgJuMezBIjMGoDqTpPWtrJAnl+MErPsoU5yut/YCkM4Ry7SOHOT9CTmc
o7PU+EBV/IMqauNMbNcBZSSXYopMXmT0UADc5xX1b1IXPlDnec20wfy58kN51BPDVQzo7OYEMrG0
Q306XYze0VtrZGlsvFiyUFK1qAfqcBGy14YugOJi6r/dvKd9nNy45n/kB0Ap9Ix67QNqo95OPLQF
TD/Nv9rOKWfDImLYGoVrNhjSJjTWsrfHcxZxHxm7dW1cg8EHk286bbI4FL8BAAD//wMAHFSnRdID
AAA=
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8c85eb7568111cf3-GRU
- 99a5d4d0bb8f7327-EWR
Connection:
- keep-alive
Content-Encoding:
@@ -70,37 +166,54 @@ interactions:
Content-Type:
- application/json
Date:
- Tue, 24 Sep 2024 21:38:09 GMT
- Thu, 06 Nov 2025 16:05:16 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=REDACTED;
path=/; expires=Thu, 06-Nov-25 16:35:16 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=REDACTED;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- crewai-iuxna1
- user-REDACTED
openai-processing-ms:
- '662'
- '836'
openai-project:
- proj_REDACTED
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-envoy-upstream-service-time:
- '983'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '50000000'
- '200000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '49999772'
- '199785'
x-ratelimit-reset-requests:
- 6ms
- 8.64s
x-ratelimit-reset-tokens:
- 0s
- 64ms
x-request-id:
- req_833406276d399714b624a32627fc5b4a
http_version: HTTP/1.1
status_code: 200
- req_c302b31f8f804399ae05fc424215303a
status:
code: 200
message: OK
version: 1

View File

@@ -1,54 +1,165 @@
interactions:
- request:
body: '{"model": "openai/gpt-4o-mini", "messages": [{"role": "system", "content":
"Your goal is to rewrite the user query so that it is optimized for retrieval
from a vector database. Consider how the query will be used to find relevant
documents, and aim to make it more specific and context-aware. \n\n Do not include
any other text than the rewritten query, especially any preamble or postamble
and only add expected output format if its relevant to the rewritten query.
\n\n Focus on the key words of the intended task and to retrieve the most relevant
information. \n\n There will be some extra context provided that might need
to be removed such as expected_output formats structured_outputs and other instructions."},
{"role": "user", "content": "The original query is: What is Vidit''s favorite
color?\n\nThis is the expected criteria for your final answer: Vidit''s favorclearite
color.\nyou MUST return the actual complete content as the final answer, not
a summary.."}], "stream": false, "stop": ["\nObservation:"]}'
body: '{"trace_id": "9d9d9d14-e5bc-44bc-8cfc-3df9ba4e6055", "execution_type":
"crew", "user_identifier": null, "execution_context": {"crew_fingerprint": null,
"crew_name": "crew", "flow_name": null, "crewai_version": "1.3.0", "privacy_level":
"standard"}, "execution_metadata": {"expected_duration_estimate": 300, "agent_count":
0, "task_count": 0, "flow_method_count": 0, "execution_started_at": "2025-11-06T15:58:15.778396+00:00"},
"ephemeral_trace_id": "9d9d9d14-e5bc-44bc-8cfc-3df9ba4e6055"}'
headers:
Accept:
- '*/*'
Accept-Encoding:
- gzip, deflate, zstd
Connection:
- keep-alive
Content-Length:
- '488'
Content-Type:
- application/json
User-Agent:
- CrewAI-CLI/1.3.0
X-Crewai-Version:
- 1.3.0
method: POST
uri: https://app.crewai.com/crewai_plus/api/v1/tracing/ephemeral/batches
response:
body:
string: '{"id":"f303021e-f1a0-4fd8-9c7d-8ba6779f8ad3","ephemeral_trace_id":"9d9d9d14-e5bc-44bc-8cfc-3df9ba4e6055","execution_type":"crew","crew_name":"crew","flow_name":null,"status":"running","duration_ms":null,"crewai_version":"1.3.0","total_events":0,"execution_context":{"crew_fingerprint":null,"crew_name":"crew","flow_name":null,"crewai_version":"1.3.0","privacy_level":"standard"},"created_at":"2025-11-06T15:58:16.189Z","updated_at":"2025-11-06T15:58:16.189Z","access_code":"TRACE-c2990cd4d4","user_identifier":null}'
headers:
Connection:
- keep-alive
Content-Length:
- '515'
Content-Type:
- application/json; charset=utf-8
Date:
- Thu, 06 Nov 2025 15:58:16 GMT
cache-control:
- no-store
content-security-policy:
- 'default-src ''self'' *.app.crewai.com app.crewai.com; script-src ''self''
''unsafe-inline'' *.app.crewai.com app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts
https://www.gstatic.com https://run.pstmn.io https://apis.google.com https://apis.google.com/js/api.js
https://accounts.google.com https://accounts.google.com/gsi/client https://cdnjs.cloudflare.com/ajax/libs/normalize/8.0.1/normalize.min.css.map
https://*.google.com https://docs.google.com https://slides.google.com https://js.hs-scripts.com
https://js.sentry-cdn.com https://browser.sentry-cdn.com https://www.googletagmanager.com
https://js-na1.hs-scripts.com https://js.hubspot.com http://js-na1.hs-scripts.com
https://bat.bing.com https://cdn.amplitude.com https://cdn.segment.com https://d1d3n03t5zntha.cloudfront.net/
https://descriptusercontent.com https://edge.fullstory.com https://googleads.g.doubleclick.net
https://js.hs-analytics.net https://js.hs-banner.com https://js.hsadspixel.net
https://js.hscollectedforms.net https://js.usemessages.com https://snap.licdn.com
https://static.cloudflareinsights.com https://static.reo.dev https://www.google-analytics.com
https://share.descript.com/; style-src ''self'' ''unsafe-inline'' *.app.crewai.com
app.crewai.com https://cdn.jsdelivr.net/npm/apexcharts; img-src ''self'' data:
*.app.crewai.com app.crewai.com https://zeus.tools.crewai.com https://dashboard.tools.crewai.com
https://cdn.jsdelivr.net https://forms.hsforms.com https://track.hubspot.com
https://px.ads.linkedin.com https://px4.ads.linkedin.com https://www.google.com
https://www.google.com.br; font-src ''self'' data: *.app.crewai.com app.crewai.com;
connect-src ''self'' *.app.crewai.com app.crewai.com https://zeus.tools.crewai.com
https://connect.useparagon.com/ https://zeus.useparagon.com/* https://*.useparagon.com/*
https://run.pstmn.io https://connect.tools.crewai.com/ https://*.sentry.io
https://www.google-analytics.com https://edge.fullstory.com https://rs.fullstory.com
https://api.hubspot.com https://forms.hscollectedforms.net https://api.hubapi.com
https://px.ads.linkedin.com https://px4.ads.linkedin.com https://google.com/pagead/form-data/16713662509
https://google.com/ccm/form-data/16713662509 https://www.google.com/ccm/collect
https://worker-actionkit.tools.crewai.com https://api.reo.dev; frame-src ''self''
*.app.crewai.com app.crewai.com https://connect.useparagon.com/ https://zeus.tools.crewai.com
https://zeus.useparagon.com/* https://connect.tools.crewai.com/ https://docs.google.com
https://drive.google.com https://slides.google.com https://accounts.google.com
https://*.google.com https://app.hubspot.com/ https://td.doubleclick.net https://www.googletagmanager.com/
https://www.youtube.com https://share.descript.com'
etag:
- W/"8df0b730688b8bc094b74c66a6293578"
expires:
- '0'
permissions-policy:
- camera=(), microphone=(self), geolocation=()
pragma:
- no-cache
referrer-policy:
- strict-origin-when-cross-origin
strict-transport-security:
- max-age=63072000; includeSubDomains
vary:
- Accept
x-content-type-options:
- nosniff
x-frame-options:
- SAMEORIGIN
x-permitted-cross-domain-policies:
- none
x-request-id:
- 38352441-7508-4e1e-9bff-77d1689dffdf
x-runtime:
- '0.085540'
x-xss-protection:
- 1; mode=block
status:
code: 201
message: Created
- request:
body: '{"messages":[{"role":"system","content":"Your goal is to rewrite the user
query so that it is optimized for retrieval from a vector database. Consider
how the query will be used to find relevant documents, and aim to make it more
specific and context-aware. \n\n Do not include any other text than the rewritten
query, especially any preamble or postamble and only add expected output format
if its relevant to the rewritten query. \n\n Focus on the key words of the intended
task and to retrieve the most relevant information. \n\n There will be some
extra context provided that might need to be removed such as expected_output
formats structured_outputs and other instructions."},{"role":"user","content":"The
original query is: What is Vidit''s favorite color?\n\nThis is the expected
criteria for your final answer: Vidit''s favorite color.\nyou MUST return the
actual complete content as the final answer, not a summary.."}],"model":"gpt-4o-mini"}'
headers:
accept:
- '*/*'
- application/json
accept-encoding:
- gzip, deflate
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '1017'
- '950'
content-type:
- application/json
host:
- openrouter.ai
http-referer:
- https://litellm.ai
- api.openai.com
user-agent:
- litellm/1.68.0
x-title:
- liteLLM
- OpenAI/Python 1.109.1
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.109.1
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.9
method: POST
uri: https://openrouter.ai/api/v1/chat/completions
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//4lKAAS4AAAAA//90kE1PIzEMhv8Kei97Sdnplwq5gTgAF8ShcFitRmnG
nTFk4ihxq11V899Xs6gFJLja78djH8ANLFqKk+lqsZpP56vpYqJhublfP1eP65v1i79Lt9fdMwxS
lj03lGHxkChe3cGgl4YCLCRRdPyzTTpZyKTnyDCQzQt5hYXvnJ576VMgZYkw8JmcUgP7XmvgO2FP
BfbXAUHalGVTYOMuBIMtRy5dnckVibAoKgkG0Snvqf5my7GhP7CVQU+luJZgD8gSCBauFC7qoo40
EpXiSPrEDeuPcrZ1e8msdOYlSIZBpu2uuHDEeWvi2L4NhuG3QflblPqRpaWcMv8P3Ka6ml/OLmaz
6rKCwe6IkbL0SWuVV4pl/MNy5Di+6DRfGqioC+/Ci8p8NtcNqeNQxlTvfEfNSVwNX4R+1J/u+GAZ
hn8AAAD//wMAIwJ79CICAAA=
H4sIAAAAAAAAAwAAAP//jFJBbtswELzrFQQvvViBLMuy42sObYEWKIoiQFMEAkOu5G0oLkGu0xaB
/15QciwlTYFceODsDGeG+5gJIdHInZB6r1j33uZX33+1H78y9tvVt+Lmpv68KrfXX96Xnz7cu61c
JAbd/QTNT6wLTb23wEhuhHUAxZBUl5u6rKqqvqwHoCcDNtE6z3lFeY8O87Ioq7zY5MuTuN4Taohy
J35kQgjxOJzJpzPwW+5EsXi66SFG1YHcnYeEkIFsupEqRoysHMvFBGpyDG6wfo0G+V0UrXqggAxC
k6VwMZ8O0B6iSo7dwdoZoJwjVinx4PP2hBzPzix1PtBdfEGVLTqM+yaAiuSSi8jk5YAeMyFuhwYO
z0JJH6j33DDdw/DccrMa9eRU/ISuTxgTKzsnbRevyDUGWKGNswqlVnoPZqJOfauDQZoB2Sz0v2Ze
0x6Do+veIj8BWoNnMI0PYFA/DzyNBUhr+b+xc8mDYRkhPKCGhhFC+ggDrTrYcVlk/BMZ+qZF10Hw
AceNaX2zrgvV1rBeX8rsmP0FAAD//wMA5SIzeT8DAAA=
headers:
Access-Control-Allow-Origin:
- '*'
CF-RAY:
- 9402c9db99ec4722-BOM
- 99a5ca96bb1443e7-EWR
Connection:
- keep-alive
Content-Encoding:
@@ -56,73 +167,122 @@ interactions:
Content-Type:
- application/json
Date:
- Thu, 15 May 2025 12:55:14 GMT
- Thu, 06 Nov 2025 15:58:16 GMT
Server:
- cloudflare
Set-Cookie:
- __cf_bm=REDACTED;
path=/; expires=Thu, 06-Nov-25 16:28:16 GMT; domain=.api.openai.com; HttpOnly;
Secure; SameSite=None
- _cfuvid=REDACTED;
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
x-clerk-auth-message:
- Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid,
token-carrier=header)
x-clerk-auth-reason:
- token-invalid
x-clerk-auth-status:
- signed-out
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- user-REDACTED
openai-processing-ms:
- '235'
openai-project:
- proj_REDACTED
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '420'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '200000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '199785'
x-ratelimit-reset-requests:
- 8.64s
x-ratelimit-reset-tokens:
- 64ms
x-request-id:
- req_9810e9721aa9463c930414ab5174ab61
status:
code: 200
message: OK
- request:
body: '{"model": "openai/gpt-4o-mini", "messages": [{"role": "system", "content":
"You are Information Agent. You have access to specific knowledge sources.\nYour
personal goal is: Provide information based on knowledge sources\nTo give my
best complete final answer to the task respond using the exact following format:\n\nThought:
I now can give a great answer\nFinal Answer: Your final answer must be the great
and the most complete as possible, it must be outcome described.\n\nI MUST use
these formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent
Task: What is Vidit''s favorite color?\n\nThis is the expected criteria for
your final answer: Vidit''s favorclearite color.\nyou MUST return the actual
complete content as the final answer, not a summary.\n\nBegin! This is VERY
important to you, use the tools available and give your best Final Answer, your
job depends on it!\n\nThought:"}], "stream": false, "stop": ["\nObservation:"]}'
body: '{"messages":[{"role":"system","content":"You are Information Agent. You
have access to specific knowledge sources.\nYour personal goal is: Provide information
based on knowledge sources\nTo give my best complete final answer to the task
respond using the exact following format:\n\nThought: I now can give a great
answer\nFinal Answer: Your final answer must be the great and the most complete
as possible, it must be outcome described.\n\nI MUST use these formats, my job
depends on it!"},{"role":"user","content":"\nCurrent Task: What is Vidit''s
favorite color?\n\nThis is the expected criteria for your final answer: Vidit''s
favorite color.\nyou MUST return the actual complete content as the final answer,
not a summary.\n\nBegin! This is VERY important to you, use the tools available
and give your best Final Answer, your job depends on it!\n\nThought:"}],"model":"gpt-4o-mini"}'
headers:
accept:
- '*/*'
- application/json
accept-encoding:
- gzip, deflate
- gzip, deflate, zstd
connection:
- keep-alive
content-length:
- '951'
- '884'
content-type:
- application/json
cookie:
- __cf_bm=REDACTED;
_cfuvid=REDACTED
host:
- openrouter.ai
http-referer:
- https://litellm.ai
- api.openai.com
user-agent:
- litellm/1.68.0
x-title:
- liteLLM
- OpenAI/Python 1.109.1
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.109.1
x-stainless-read-timeout:
- '600'
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.9
method: POST
uri: https://openrouter.ai/api/v1/chat/completions
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: !!binary |
H4sIAAAAAAAAAwAAAP//4lKAAS4AAAAA///iQjABAAAA//90kN1qGzEQRl9FfNdyul4nday73ARy
VUpLE2jLIu+O15NoZ4QkOy1moa/R1+uTlE1wnEB7qU/zc84cwB0cepLZfHm+XMwXy/nF7II/3d7V
H+tOPvsS3le3d+keFjHpnjtKcPgQSa5uYDFoRwEOGkk8v+tjmZ3rbGBhWOj6ntoCh3bry1mrQwxU
WAUWbSJfqIM7rbVot8otZbivBwTtY9J1hpNdCBYbFs7bJpHPKnDIRSMsxBfeU/OfX5aOfsBVFgPl
7HuCOyBpIDj4nDkXL2WiUSkkE+mNEX00rRfT856MN/0EarzkR0rGfJNrFh/M1dPbmS/ccfnz63c2
G7/XxIVMq0GT4WzWYUdnsEi02WUfjiLPjCz9czCO3y3yz1xomCx6SjHxE8omNtViVV/WdbWqYLE7
CsSkQyxN0QeSPF2wmgyOxz3lK4uixYdTcrmyb7ubjornkKexrW+31L0UV+M/pr6ufxF51TKOfwEA
AP//AwBybekMaAIAAA==
H4sIAAAAAAAAAwAAAP//jFPBahsxEL37KwZderGN7dqO41vaUghtT4FCacIiS7PrSbQaVZq1swT/
e9HayTptCr0ING/e6M2b0dMAQJFVa1Bmq8XUwY0+/tiXXy4/2BDN/p7izdY4/vrp29Wvmxbv1TAz
eHOPRp5ZY8N1cCjE/gibiFowV51eLGfz+Xx5ueqAmi26TKuCjOY8qsnTaDaZzUeTi9F0dWJvmQwm
tYafAwCAp+7MOr3FR7WGyfA5UmNKukK1fkkCUJFdjiidEiXRXtSwBw17Qd9JvwbPezDaQ0U7BA1V
lg3apz1GgFv/mbx2cNXd1/CdLMm7BKXecSRBMOw4AiXwLBCajSPjWrBsmhq9oAWOsCeLroUHz3s/
husSWm5gq3cIKaChkgx0ih4lZ1sUTS6B3nAjxweHcA21bmGDoDcOQRhC5B3ZLLjmiJApHNFCxBTY
Jxyf9xuxbJLOnvvGuTNAe8+i88w6p+9OyOHFW8dViLxJf1BVSZ7StoioE/vsYxIOqkMPA4C7bobN
q7GoELkOUgg/YPfcdLk61lP96vTofHEChUW7Pj6bvh++Ua842Xa2Bcpos0XbU/uV0Y0lPgMGZ13/
reat2sfOyVf/U74HjMEgaIsQ0ZJ53XGfFjH/rH+lvbjcCVYJ444MFkIY8yQslrpxx31XqU2CdVGS
rzCGSMelL0OxWE50ucTF4lINDoPfAAAA//8DAPFGfbMCBAAA
headers:
Access-Control-Allow-Origin:
- '*'
CF-RAY:
- 9402c9e1b94a4722-BOM
- 99a5ca9c5ef543e7-EWR
Connection:
- keep-alive
Content-Encoding:
@@ -130,20 +290,47 @@ interactions:
Content-Type:
- application/json
Date:
- Thu, 15 May 2025 12:55:15 GMT
- Thu, 06 Nov 2025 15:58:19 GMT
Server:
- cloudflare
Strict-Transport-Security:
- max-age=31536000; includeSubDomains; preload
Transfer-Encoding:
- chunked
Vary:
- Accept-Encoding
x-clerk-auth-message:
- Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid,
token-carrier=header)
x-clerk-auth-reason:
- token-invalid
x-clerk-auth-status:
- signed-out
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
cf-cache-status:
- DYNAMIC
openai-organization:
- user-REDACTED
openai-processing-ms:
- '1326'
openai-project:
- proj_REDACTED
openai-version:
- '2020-10-01'
x-envoy-upstream-service-time:
- '1754'
x-openai-proxy-wasm:
- v0.1
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '200000'
x-ratelimit-remaining-requests:
- '9998'
x-ratelimit-remaining-tokens:
- '199803'
x-ratelimit-reset-requests:
- 15.913s
x-ratelimit-reset-tokens:
- 59ms
x-request-id:
- req_f975e16b666e498b8bcfdfab525f71b3
status:
code: 200
message: OK

View File

@@ -601,3 +601,81 @@ def test_file_path_validation():
match="file_path/file_paths must be a Path, str, or a list of these types",
):
PDFKnowledgeSource()
def test_hash_based_id_generation_without_doc_id(mock_vector_db):
"""Test that documents without doc_id generate hash-based IDs. Duplicates are deduplicated before upsert."""
import hashlib
import json
from crewai.rag.chromadb.utils import _prepare_documents_for_chromadb
from crewai.rag.types import BaseRecord
documents: list[BaseRecord] = [
{"content": "First document content", "metadata": {"source": "test1", "category": "research"}},
{"content": "Second document content", "metadata": {"source": "test2", "category": "research"}},
{"content": "Third document content"}, # No metadata
]
result = _prepare_documents_for_chromadb(documents)
assert len(result.ids) == 3
# Unique documents should get 64-character hex hashes (no suffix)
for doc_id in result.ids:
assert len(doc_id) == 64, f"ID should be 64 characters: {doc_id}"
assert all(c in "0123456789abcdef" for c in doc_id), f"ID should be hex: {doc_id}"
# Different documents should have different hashes
assert result.ids[0] != result.ids[1] != result.ids[2]
# Verify hashes match expected values
expected_hash_1 = hashlib.sha256(
f"First document content|{json.dumps({'category': 'research', 'source': 'test1'}, sort_keys=True)}".encode()
).hexdigest()
assert result.ids[0] == expected_hash_1, "First document hash should match expected"
expected_hash_3 = hashlib.sha256("Third document content".encode()).hexdigest()
assert result.ids[2] == expected_hash_3, "Third document hash should match expected"
# Test that duplicate documents are deduplicated (same ID, only one sent)
duplicate_documents: list[BaseRecord] = [
{"content": "Same content", "metadata": {"source": "test"}},
{"content": "Same content", "metadata": {"source": "test"}},
{"content": "Same content", "metadata": {"source": "test"}},
]
duplicate_result = _prepare_documents_for_chromadb(duplicate_documents)
# Duplicates should be deduplicated - only one ID should remain
assert len(duplicate_result.ids) == 1, "Duplicate documents should be deduplicated"
assert len(duplicate_result.ids[0]) == 64, "Deduplicated ID should be clean hash"
# Verify it's the expected hash
expected_hash = hashlib.sha256(
f"Same content|{json.dumps({'source': 'test'}, sort_keys=True)}".encode()
).hexdigest()
assert duplicate_result.ids[0] == expected_hash, "Deduplicated ID should match expected hash"
def test_hash_based_id_generation_with_doc_id_in_metadata(mock_vector_db):
"""Test that documents with doc_id in metadata use the doc_id directly, not hash-based."""
from crewai.rag.chromadb.utils import _prepare_documents_for_chromadb
from crewai.rag.types import BaseRecord
documents_with_doc_id: list[BaseRecord] = [
{"content": "First document", "metadata": {"doc_id": "custom-id-1", "source": "test1"}},
{"content": "Second document", "metadata": {"doc_id": "custom-id-2"}},
]
documents_without_doc_id: list[BaseRecord] = [
{"content": "First document", "metadata": {"source": "test1"}},
{"content": "Second document"},
]
result_with_doc_id = _prepare_documents_for_chromadb(documents_with_doc_id)
result_without_doc_id = _prepare_documents_for_chromadb(documents_without_doc_id)
assert result_with_doc_id.ids == ["custom-id-1", "custom-id-2"]
assert len(result_without_doc_id.ids) == 2
# Unique documents get 64-character hashes
for doc_id in result_without_doc_id.ids:
assert len(doc_id) == 64, "ID should be 64 characters"
assert all(c in "0123456789abcdef" for c in doc_id), "ID should be hex"

View File

@@ -6,7 +6,7 @@ import httpx
import pytest
from crewai.llms.hooks.base import BaseInterceptor
from crewai.llms.hooks.transport import AsyncHTTPransport, HTTPTransport
from crewai.llms.hooks.transport import AsyncHTTPTransport, HTTPTransport
class TrackingInterceptor(BaseInterceptor[httpx.Request, httpx.Response]):
@@ -128,7 +128,7 @@ class TestAsyncHTTPTransport:
def test_async_transport_instantiation(self) -> None:
"""Test that async transport can be instantiated with interceptor."""
interceptor = TrackingInterceptor()
transport = AsyncHTTPransport(interceptor=interceptor)
transport = AsyncHTTPTransport(interceptor=interceptor)
assert transport.interceptor is interceptor
@@ -136,13 +136,13 @@ class TestAsyncHTTPTransport:
"""Test that async transport requires interceptor parameter."""
# AsyncHTTPransport requires an interceptor parameter
with pytest.raises(TypeError):
AsyncHTTPransport()
AsyncHTTPTransport()
@pytest.mark.asyncio
async def test_async_interceptor_called_on_request(self) -> None:
"""Test that async interceptor hooks are called during request handling."""
interceptor = TrackingInterceptor()
transport = AsyncHTTPransport(interceptor=interceptor)
transport = AsyncHTTPTransport(interceptor=interceptor)
# Create a mock parent transport that returns a response
mock_response = httpx.Response(200, json={"success": True})
@@ -217,7 +217,7 @@ class TestTransportIntegration:
async def test_multiple_async_requests_same_interceptor(self) -> None:
"""Test that multiple async requests through same interceptor are tracked."""
interceptor = TrackingInterceptor()
transport = AsyncHTTPransport(interceptor=interceptor)
transport = AsyncHTTPTransport(interceptor=interceptor)
mock_response = httpx.Response(200)