From b8f2603bf3437984dc3b5c11d231d21badc94651 Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Sun, 9 Feb 2025 22:47:50 +0000 Subject: [PATCH] test: add VCR cassettes for custom LLM support tests Co-Authored-By: Joe Moura --- .../test_crew_test_with_custom_llm.yaml | 942 ++++++++++++++++++ 1 file changed, 942 insertions(+) create mode 100644 tests/utilities/evaluators/cassettes/test_crew_test_with_custom_llm.yaml diff --git a/tests/utilities/evaluators/cassettes/test_crew_test_with_custom_llm.yaml b/tests/utilities/evaluators/cassettes/test_crew_test_with_custom_llm.yaml new file mode 100644 index 000000000..c70d26e88 --- /dev/null +++ b/tests/utilities/evaluators/cassettes/test_crew_test_with_custom_llm.yaml @@ -0,0 +1,942 @@ +interactions: +- request: + body: '{"messages": [{"role": "system", "content": "You are test. test\nYour personal + goal is: test\nTo give my best complete final answer to the task respond using + the exact following format:\n\nThought: I now can give a great answer\nFinal + Answer: Your final answer must be the great and the most complete as possible, + it must be outcome described.\n\nI MUST use these formats, my job depends on + it!"}, {"role": "user", "content": "\nCurrent Task: test\n\nThis is the expected + criteria for your final answer: test output\nyou MUST return the actual complete + content as the final answer, not a summary.\n\nBegin! This is VERY important + to you, use the tools available and give your best Final Answer, your job depends + on it!\n\nThought:"}], "model": "gpt-4", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer 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 + connection: + - keep-alive + content-length: + - '780' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.61.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.61.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AzAMqMWFX8hC1szIKxWNSyXm0SPFi\",\n \"object\": + \"chat.completion\",\n \"created\": 1739141224,\n \"model\": \"gpt-4-0613\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"I am prepared to conduct the test efficiently.\\n\\nFinal + Answer: The test output that aligns with the given criteria is a detailed description + of the testing process, providing a thorough understanding for anyone reviewing + it. The output not only contains the raw data or results but also includes step-by-step + documentation of the process employed, thoughts and reasoning behind each step, + deviations if any from the original plan, and how these deviations impacted + the results. In addition, it captures any errors or unexpected occurrences during + the course of the test, and proposes possible explanations or solutions for + these. It is detailed yet comprehensible, catering to both technical and non-technical + audiences. It is a result of meticulous planning, diligent execution, and robust + post-test analysis, making it a complete content.\",\n \"refusal\": null\n + \ },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n + \ ],\n \"usage\": {\n \"prompt_tokens\": 149,\n \"completion_tokens\": + 151,\n \"total_tokens\": 300,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": null\n}\n" + headers: + CF-RAY: + - 90f7662cab11ba33-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 09 Feb 2025 22:47:09 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=p1aGVyahvfLAvEwvbX0FMmrN5o18PpVAu2dG_dTgMSU-1739141229-1.0.1.1-_q7aCslZTr11IMFZ81VgyuqsGiqTARFPANUvBEWM_0dZdb97Py78KE1omxdNv5F1pFKoWUqA1kEF2wzQ2wz4aA; + path=/; expires=Sun, 09-Feb-25 23:17:09 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=bsF0jwE67cS.ywAaQU59jKPFC03S1dvynClHm_wTQik-1739141229143-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '4585' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '999822' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 10ms + x-request-id: + - req_1ba81a80018602119b871a7a42d7becf + http_version: HTTP/1.1 + status_code: 200 +- request: + body: !!binary | + Ct8LCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkStgsKEgoQY3Jld2FpLnRl + bGVtZXRyeRL5AQoQT8uAOJ+suOhFs22RW56o6BII0Ob64+TP3XQqE0NyZXcgVGVzdCBFeGVjdXRp + b24wATk1IlKguqsiGEGGgWOguqsiGEobCg5jcmV3YWlfdmVyc2lvbhIJCgcwLjEwMC4xSi4KCGNy + ZXdfa2V5EiIKIGZlYjFlMjFiMzI1NmM1OWE2NDcxNTJhZmRkNjYzMjJlSjEKB2NyZXdfaWQSJgok + M2Q1MGJkYWItZDI1NS00MjFiLThkMzMtZjZmOTAzMThhOWQwShEKCml0ZXJhdGlvbnMSAwoBMUoV + Cgptb2RlbF9uYW1lEgcKBWdwdC00egIYAYUBAAEAABKSBwoQgzrB2KxaHe9FwPNktJHbFRIIT8gM + r7rSvJUqDENyZXcgQ3JlYXRlZDABOW4kaqC6qyIYQe+yeaC6qyIYShsKDmNyZXdhaV92ZXJzaW9u + EgkKBzAuMTAwLjFKGgoOcHl0aG9uX3ZlcnNpb24SCAoGMy4xMi43Si4KCGNyZXdfa2V5EiIKIGZl + YjFlMjFiMzI1NmM1OWE2NDcxNTJhZmRkNjYzMjJlSjEKB2NyZXdfaWQSJgokM2Q1MGJkYWItZDI1 + NS00MjFiLThkMzMtZjZmOTAzMThhOWQwShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEK + C2NyZXdfbWVtb3J5EgIQAEoaChRjcmV3X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1i + ZXJfb2ZfYWdlbnRzEgIYAUrFAgoLY3Jld19hZ2VudHMStQIKsgJbeyJrZXkiOiAiOTc2ZjhmNTBh + Y2NmZWJhMjIzZTQ5YzQyYjE2ZTk5ZTYiLCAiaWQiOiAiN2E3NmZjNmYtZTI5YS00MDBlLWI0NGEt + NzAyMDNlMzg1Y2RmIiwgInJvbGUiOiAidGVzdCIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0 + ZXIiOiAyNSwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxs + bSI6ICJncHQtNCIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4 + ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFtd + fV1K+QEKCmNyZXdfdGFza3MS6gEK5wFbeyJrZXkiOiAiZGE5NWViZGIzNmU0Y2RmOTJkZjZhNmRk + MTZiY2VlMGUiLCAiaWQiOiAiNTcwYmJlYjQtYzkzNi00NTNkLTg2MjktYzhjMDM0ODA5NDhjIiwg + ImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRf + cm9sZSI6ICJ0ZXN0IiwgImFnZW50X2tleSI6ICI5NzZmOGY1MGFjY2ZlYmEyMjNlNDljNDJiMTZl + OTllNiIsICJ0b29sc19uYW1lcyI6IFtdfV16AhgBhQEAAQAAEo4CChAus8hZAJcezzXdP2XqhVyF + Egi1wnliqIdQdSoMVGFzayBDcmVhdGVkMAE5nneIoLqrIhhBKDyJoLqrIhhKLgoIY3Jld19rZXkS + IgogZmViMWUyMWIzMjU2YzU5YTY0NzE1MmFmZGQ2NjMyMmVKMQoHY3Jld19pZBImCiQzZDUwYmRh + Yi1kMjU1LTQyMWItOGQzMy1mNmY5MDMxOGE5ZDBKLgoIdGFza19rZXkSIgogZGE5NWViZGIzNmU0 + Y2RmOTJkZjZhNmRkMTZiY2VlMGVKMQoHdGFza19pZBImCiQ1NzBiYmViNC1jOTM2LTQ1M2QtODYy + OS1jOGMwMzQ4MDk0OGN6AhgBhQEAAQAA + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '1506' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.27.0 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Sun, 09 Feb 2025 22:47:09 GMT + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Task Execution Evaluator. + Evaluator agent for crew evaluation with precise capabilities to evaluate the + performance of the agents in the crew based on the tasks they have performed\nYour + personal goal is: Your goal is to evaluate the performance of the agents in + the crew based on the tasks they have performed using score from 1 to 10 evaluating + on completion, quality, and overall performance.\nTo give my best complete final + answer to the task respond using the exact following format:\n\nThought: I now + can give a great answer\nFinal Answer: Your final answer must be the great and + the most complete as possible, it must be outcome described.\n\nI MUST use these + formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent Task: + Based on the task description and the expected output, compare and evaluate + the performance of the agents in the crew based on the Task Output they have + performed using score from 1 to 10 evaluating on completion, quality, and overall + performance.task_description: test task_expected_output: test output agent: + test agent_goal: test Task Output: The test output that aligns with the given + criteria is a detailed description of the testing process, providing a thorough + understanding for anyone reviewing it. The output not only contains the raw + data or results but also includes step-by-step documentation of the process + employed, thoughts and reasoning behind each step, deviations if any from the + original plan, and how these deviations impacted the results. In addition, it + captures any errors or unexpected occurrences during the course of the test, + and proposes possible explanations or solutions for these. It is detailed yet + comprehensible, catering to both technical and non-technical audiences. It is + a result of meticulous planning, diligent execution, and robust post-test analysis, + making it a complete content.\n\nThis is the expected criteria for your final + answer: Evaluation Score from 1 to 10 based on the performance of the agents + on the tasks\nyou MUST return the actual complete content as the final answer, + not a summary.\nEnsure your final answer contains only the content in the following + format: {\n \"quality\": float\n}\n\nEnsure the final output does not include + any code block markers like ```json or ```python.\n\nBegin! This is VERY important + to you, use the tools available and give your best Final Answer, your job depends + on it!\n\nThought:"}], "model": "gpt-4", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer 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 + connection: + - keep-alive + content-length: + - '2523' + content-type: + - application/json + cookie: + - __cf_bm=p1aGVyahvfLAvEwvbX0FMmrN5o18PpVAu2dG_dTgMSU-1739141229-1.0.1.1-_q7aCslZTr11IMFZ81VgyuqsGiqTARFPANUvBEWM_0dZdb97Py78KE1omxdNv5F1pFKoWUqA1kEF2wzQ2wz4aA; + _cfuvid=bsF0jwE67cS.ywAaQU59jKPFC03S1dvynClHm_wTQik-1739141229143-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.61.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.61.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AzAMvAOSw5847reo2vh61focjnyK2\",\n \"object\": + \"chat.completion\",\n \"created\": 1739141229,\n \"model\": \"gpt-4-0613\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Based on the given task output, I can + determine that the test agent has performed impressively well. Their work is + comprehensive, catering to both non-technical and technical audiences and includes + complete and detailed process documentation. Further, the way they detect and + elaborate deviances and errors shows their meticulousness and efficiency. Their + planning, execution, and analysis are sound.\\n\\nFinal Answer: The quality + of this task is admirable, paying attention to details and meticulously planning + and reasoning behind each step. Considering all these, on a scale from 1 to + 10, I would rate the task performed by the test agent as follows:\\n\\n{\\n + \ \\\"quality\\\": 9.5\\n}\",\n \"refusal\": null\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 471,\n \"completion_tokens\": 133,\n \"total_tokens\": 604,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": null\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 90f7664a581eba33-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 09 Feb 2025 22:47:14 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '4884' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '999388' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 36ms + x-request-id: + - req_0335ff13c1777c1bcdbee89879bc132c + http_version: HTTP/1.1 + status_code: 200 +- request: + body: !!binary | + CvsNCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS0g0KEgoQY3Jld2FpLnRl + bGVtZXRyeRKZAgoQJQy0LAglHxA7Ok+n0Gmi9hIIybabc5KDQlkqG0NyZXcgSW5kaXZpZHVhbCBU + ZXN0IFJlc3VsdDABOQK5tem8qyIYQVTw0+m8qyIYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTAw + LjFKLgoIY3Jld19rZXkSIgogZmViMWUyMWIzMjU2YzU5YTY0NzE1MmFmZGQ2NjMyMmVKMQoHY3Jl + d19pZBImCiQzZDUwYmRhYi1kMjU1LTQyMWItOGQzMy1mNmY5MDMxOGE5ZDBKEAoHcXVhbGl0eRIF + CgM5LjVKFwoJZXhlY190aW1lEgoKCDQuODA5MTc4ShUKCm1vZGVsX25hbWUSBwoFZ3B0LTR6AhgB + hQEAAQAAEvkBChCnePSvJJg/cFeYF3HlEvyVEgh3YAewpHkssyoTQ3JldyBUZXN0IEV4ZWN1dGlv + bjABOeiBSOq8qyIYQQWTV+q8qyIYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTAwLjFKLgoIY3Jl + d19rZXkSIgogZmViMWUyMWIzMjU2YzU5YTY0NzE1MmFmZGQ2NjMyMmVKMQoHY3Jld19pZBImCiQ2 + YzYyNmEyZi05OGRlLTQ2ODAtOWJhNC01NWVkYzdmODhiZTNKEQoKaXRlcmF0aW9ucxIDCgExShUK + Cm1vZGVsX25hbWUSBwoFZ3B0LTR6AhgBhQEAAQAAEpIHChB0Gz3vlppAjams1hbMI/RQEggPCufR + e9thfSoMQ3JldyBDcmVhdGVkMAE5X3td6ryrIhhBWNhr6ryrIhhKGwoOY3Jld2FpX3ZlcnNpb24S + CQoHMC4xMDAuMUoaCg5weXRob25fdmVyc2lvbhIICgYzLjEyLjdKLgoIY3Jld19rZXkSIgogZmVi + MWUyMWIzMjU2YzU5YTY0NzE1MmFmZGQ2NjMyMmVKMQoHY3Jld19pZBImCiQ2YzYyNmEyZi05OGRl + LTQ2ODAtOWJhNC01NWVkYzdmODhiZTNKHAoMY3Jld19wcm9jZXNzEgwKCnNlcXVlbnRpYWxKEQoL + Y3Jld19tZW1vcnkSAhAAShoKFGNyZXdfbnVtYmVyX29mX3Rhc2tzEgIYAUobChVjcmV3X251bWJl + cl9vZl9hZ2VudHMSAhgBSsUCCgtjcmV3X2FnZW50cxK1AgqyAlt7ImtleSI6ICI5NzZmOGY1MGFj + Y2ZlYmEyMjNlNDljNDJiMTZlOTllNiIsICJpZCI6ICJkYTA4M2Q5ZS0xOWU5LTQyMzAtYjZmNC0y + NjlhNzM1NzViOWQiLCAicm9sZSI6ICJ0ZXN0IiwgInZlcmJvc2U/IjogZmFsc2UsICJtYXhfaXRl + ciI6IDI1LCAibWF4X3JwbSI6IG51bGwsICJmdW5jdGlvbl9jYWxsaW5nX2xsbSI6ICIiLCAibGxt + IjogImdwdC00IiwgImRlbGVnYXRpb25fZW5hYmxlZD8iOiBmYWxzZSwgImFsbG93X2NvZGVfZXhl + Y3V0aW9uPyI6IGZhbHNlLCAibWF4X3JldHJ5X2xpbWl0IjogMiwgInRvb2xzX25hbWVzIjogW119 + XUr5AQoKY3Jld190YXNrcxLqAQrnAVt7ImtleSI6ICJkYTk1ZWJkYjM2ZTRjZGY5MmRmNmE2ZGQx + NmJjZWUwZSIsICJpZCI6ICJhNGUwYjM1Ny0zMDBlLTQ0MjMtYTU1My0yZTZlMWQxODg1M2MiLCAi + YXN5bmNfZXhlY3V0aW9uPyI6IGZhbHNlLCAiaHVtYW5faW5wdXQ/IjogZmFsc2UsICJhZ2VudF9y + b2xlIjogInRlc3QiLCAiYWdlbnRfa2V5IjogIjk3NmY4ZjUwYWNjZmViYTIyM2U0OWM0MmIxNmU5 + OWU2IiwgInRvb2xzX25hbWVzIjogW119XXoCGAGFAQABAAASjgIKEC5jZek+sSlZP8lSwF5zTSYS + CIHcVJhIpsWuKgxUYXNrIENyZWF0ZWQwATl9vnrqvKsiGEFgjnvqvKsiGEouCghjcmV3X2tleRIi + CiBmZWIxZTIxYjMyNTZjNTlhNjQ3MTUyYWZkZDY2MzIyZUoxCgdjcmV3X2lkEiYKJDZjNjI2YTJm + LTk4ZGUtNDY4MC05YmE0LTU1ZWRjN2Y4OGJlM0ouCgh0YXNrX2tleRIiCiBkYTk1ZWJkYjM2ZTRj + ZGY5MmRmNmE2ZGQxNmJjZWUwZUoxCgd0YXNrX2lkEiYKJGE0ZTBiMzU3LTMwMGUtNDQyMy1hNTUz + LTJlNmUxZDE4ODUzY3oCGAGFAQABAAA= + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '1790' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.27.0 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Sun, 09 Feb 2025 22:47:14 GMT + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are test. test\nYour personal + goal is: test\nTo give my best complete final answer to the task respond using + the exact following format:\n\nThought: I now can give a great answer\nFinal + Answer: Your final answer must be the great and the most complete as possible, + it must be outcome described.\n\nI MUST use these formats, my job depends on + it!"}, {"role": "user", "content": "\nCurrent Task: test\n\nThis is the expected + criteria for your final answer: test output\nyou MUST return the actual complete + content as the final answer, not a summary.\n\nBegin! This is VERY important + to you, use the tools available and give your best Final Answer, your job depends + on it!\n\nThought:"}], "model": "gpt-4", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer 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 + connection: + - keep-alive + content-length: + - '780' + content-type: + - application/json + cookie: + - __cf_bm=p1aGVyahvfLAvEwvbX0FMmrN5o18PpVAu2dG_dTgMSU-1739141229-1.0.1.1-_q7aCslZTr11IMFZ81VgyuqsGiqTARFPANUvBEWM_0dZdb97Py78KE1omxdNv5F1pFKoWUqA1kEF2wzQ2wz4aA; + _cfuvid=bsF0jwE67cS.ywAaQU59jKPFC03S1dvynClHm_wTQik-1739141229143-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.61.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.61.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AzAN0cgAktzQnGukedPNpZsTy461c\",\n \"object\": + \"chat.completion\",\n \"created\": 1739141234,\n \"model\": \"gpt-4-0613\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"I have an understanding of the task at + hand and am ready to provide an in-depth and comprehensive answer.\\n\\nFinal + Answer: As per the requirement of the task to provide a complete output, I am + returning this test output as my final answer. It is not a summary, but rather + a full and comprehensive response that fully addresses the question and expectations + set forth. Your test output is ready.\",\n \"refusal\": null\n },\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 149,\n \"completion_tokens\": 77,\n + \ \"total_tokens\": 226,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": null\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 90f76669bd9eba33-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 09 Feb 2025 22:47:17 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '3379' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '999822' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 10ms + x-request-id: + - req_977371ae262154885689d766016ed132 + http_version: HTTP/1.1 + status_code: 200 +- request: + body: '{"messages": [{"role": "system", "content": "You are Task Execution Evaluator. + Evaluator agent for crew evaluation with precise capabilities to evaluate the + performance of the agents in the crew based on the tasks they have performed\nYour + personal goal is: Your goal is to evaluate the performance of the agents in + the crew based on the tasks they have performed using score from 1 to 10 evaluating + on completion, quality, and overall performance.\nTo give my best complete final + answer to the task respond using the exact following format:\n\nThought: I now + can give a great answer\nFinal Answer: Your final answer must be the great and + the most complete as possible, it must be outcome described.\n\nI MUST use these + formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent Task: + Based on the task description and the expected output, compare and evaluate + the performance of the agents in the crew based on the Task Output they have + performed using score from 1 to 10 evaluating on completion, quality, and overall + performance.task_description: test task_expected_output: test output agent: + test agent_goal: test Task Output: As per the requirement of the task to provide + a complete output, I am returning this test output as my final answer. It is + not a summary, but rather a full and comprehensive response that fully addresses + the question and expectations set forth. Your test output is ready.\n\nThis + is the expected criteria for your final answer: Evaluation Score from 1 to 10 + based on the performance of the agents on the tasks\nyou MUST return the actual + complete content as the final answer, not a summary.\nEnsure your final answer + contains only the content in the following format: {\n \"quality\": float\n}\n\nEnsure + the final output does not include any code block markers like ```json or ```python.\n\nBegin! + This is VERY important to you, use the tools available and give your best Final + Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer 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 + connection: + - keep-alive + content-length: + - '2017' + content-type: + - application/json + cookie: + - __cf_bm=p1aGVyahvfLAvEwvbX0FMmrN5o18PpVAu2dG_dTgMSU-1739141229-1.0.1.1-_q7aCslZTr11IMFZ81VgyuqsGiqTARFPANUvBEWM_0dZdb97Py78KE1omxdNv5F1pFKoWUqA1kEF2wzQ2wz4aA; + _cfuvid=bsF0jwE67cS.ywAaQU59jKPFC03S1dvynClHm_wTQik-1739141229143-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.61.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.61.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AzAN3PPgDvH836sMJBXCVRc3im99S\",\n \"object\": + \"chat.completion\",\n \"created\": 1739141237,\n \"model\": \"gpt-4-0613\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Based on the information provided, the + agent appears to have completed the task, providing an output that they have + defined as 'full and comprehensive'. It appears that the agent has attempted + to meet all the expectations of the task description and has reached the goal + of returning a 'test output' as the final answer.\\n\\nFinal Answer: Considering + these aspects, for task completion, the agent receives a 10 as they have successfully + generated an output. For quality, the agent again receives a significant score + of 10, because the full and comprehensive nature of the output matches the task's + expectations. Finally, taking into account both the completion and quality aspects, + the overall performance evaluation is also 10, recognizing the perfect alignment + between the task's expected output and the output delivered by the agent. \\nTherefore, + the final evaluation score can be summarized in the below format:\\n{\\n\\\"completion\\\": + 10,\\n\\\"quality\\\": 10,\\n\\\"overall performance\\\": 10\\n}\",\n \"refusal\": + null\n },\n \"logprobs\": null,\n \"finish_reason\": \"stop\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 385,\n \"completion_tokens\": + 190,\n \"total_tokens\": 575,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": null\n}\n" + headers: + CF-RAY: + - 90f7667f9bfdba33-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 09 Feb 2025 22:47:24 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '6909' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '999515' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 29ms + x-request-id: + - req_3e283ae8c1cd132001ecf2d96198bbd6 + http_version: HTTP/1.1 + status_code: 200 +- request: + body: '{"messages": [{"role": "system", "content": "You are test. test\nYour personal + goal is: test\nTo give my best complete final answer to the task respond using + the exact following format:\n\nThought: I now can give a great answer\nFinal + Answer: Your final answer must be the great and the most complete as possible, + it must be outcome described.\n\nI MUST use these formats, my job depends on + it!"}, {"role": "user", "content": "\nCurrent Task: test\n\nThis is the expected + criteria for your final answer: test output\nyou MUST return the actual complete + content as the final answer, not a summary.\n\nBegin! This is VERY important + to you, use the tools available and give your best Final Answer, your job depends + on it!\n\nThought:"}], "model": "gpt-4", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer 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 + connection: + - keep-alive + content-length: + - '780' + content-type: + - application/json + cookie: + - __cf_bm=p1aGVyahvfLAvEwvbX0FMmrN5o18PpVAu2dG_dTgMSU-1739141229-1.0.1.1-_q7aCslZTr11IMFZ81VgyuqsGiqTARFPANUvBEWM_0dZdb97Py78KE1omxdNv5F1pFKoWUqA1kEF2wzQ2wz4aA; + _cfuvid=bsF0jwE67cS.ywAaQU59jKPFC03S1dvynClHm_wTQik-1739141229143-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.61.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.61.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AzANAznjrZdppuFIsRnEouHG8WuM0\",\n \"object\": + \"chat.completion\",\n \"created\": 1739141244,\n \"model\": \"gpt-4-0613\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"I am ready to prepare my final answer + based on the test output criteria provided\\nFinal Answer: I have followed all + the instructions provided in the task to the best of my ability, and the outcome + of the test is as described in the final answer. It is complete, detailed, and + accurate.\",\n \"refusal\": null\n },\n \"logprobs\": null,\n + \ \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 149,\n \"completion_tokens\": 59,\n \"total_tokens\": 208,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": null\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 90f766abaaa3ba33-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 09 Feb 2025 22:47:27 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '2533' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '999822' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 10ms + x-request-id: + - req_6ea4c81627695f58de56727aa8d8cc59 + http_version: HTTP/1.1 + status_code: 200 +- request: + body: !!binary | + CvwNCiQKIgoMc2VydmljZS5uYW1lEhIKEGNyZXdBSS10ZWxlbWV0cnkS0w0KEgoQY3Jld2FpLnRl + bGVtZXRyeRKaAgoQGJwIgEdh/Dq2y8ue+Gl/XxIInTNNpEL8yjQqG0NyZXcgSW5kaXZpZHVhbCBU + ZXN0IFJlc3VsdDABOaAPzl6/qyIYQYnY6V6/qyIYShsKDmNyZXdhaV92ZXJzaW9uEgkKBzAuMTAw + LjFKLgoIY3Jld19rZXkSIgogZmViMWUyMWIzMjU2YzU5YTY0NzE1MmFmZGQ2NjMyMmVKMQoHY3Jl + d19pZBImCiQ2YzYyNmEyZi05OGRlLTQ2ODAtOWJhNC01NWVkYzdmODhiZTNKEQoHcXVhbGl0eRIG + CgQxMC4wShcKCWV4ZWNfdGltZRIKCggzLjUwMzUyNkoVCgptb2RlbF9uYW1lEgcKBWdwdC00egIY + AYUBAAEAABL5AQoQsfCFg6/ZkEo2LShWV3X+WhII4I9o90lQzxMqE0NyZXcgVGVzdCBFeGVjdXRp + b24wATmWhUlfv6siGEFeYlZfv6siGEobCg5jcmV3YWlfdmVyc2lvbhIJCgcwLjEwMC4xSi4KCGNy + ZXdfa2V5EiIKIGZlYjFlMjFiMzI1NmM1OWE2NDcxNTJhZmRkNjYzMjJlSjEKB2NyZXdfaWQSJgok + ZDU2ZjljMWEtYmRkMS00MDI3LWI1ZjctMzg1ZGVlMWU2YjljShEKCml0ZXJhdGlvbnMSAwoBMUoV + Cgptb2RlbF9uYW1lEgcKBWdwdC00egIYAYUBAAEAABKSBwoQlucrHD/mwnCU8Dl9QKzgYhIIGyix + 8K7RcoAqDENyZXcgQ3JlYXRlZDABOTQxXF+/qyIYQamXaV+/qyIYShsKDmNyZXdhaV92ZXJzaW9u + EgkKBzAuMTAwLjFKGgoOcHl0aG9uX3ZlcnNpb24SCAoGMy4xMi43Si4KCGNyZXdfa2V5EiIKIGZl + YjFlMjFiMzI1NmM1OWE2NDcxNTJhZmRkNjYzMjJlSjEKB2NyZXdfaWQSJgokZDU2ZjljMWEtYmRk + MS00MDI3LWI1ZjctMzg1ZGVlMWU2YjljShwKDGNyZXdfcHJvY2VzcxIMCgpzZXF1ZW50aWFsShEK + C2NyZXdfbWVtb3J5EgIQAEoaChRjcmV3X251bWJlcl9vZl90YXNrcxICGAFKGwoVY3Jld19udW1i + ZXJfb2ZfYWdlbnRzEgIYAUrFAgoLY3Jld19hZ2VudHMStQIKsgJbeyJrZXkiOiAiOTc2ZjhmNTBh + Y2NmZWJhMjIzZTQ5YzQyYjE2ZTk5ZTYiLCAiaWQiOiAiMzcwMzA5YTQtMDU5OS00MWVlLWFiMTgt + YWE1ZmQ1Mjg2ZGQ1IiwgInJvbGUiOiAidGVzdCIsICJ2ZXJib3NlPyI6IGZhbHNlLCAibWF4X2l0 + ZXIiOiAyNSwgIm1heF9ycG0iOiBudWxsLCAiZnVuY3Rpb25fY2FsbGluZ19sbG0iOiAiIiwgImxs + bSI6ICJncHQtNCIsICJkZWxlZ2F0aW9uX2VuYWJsZWQ/IjogZmFsc2UsICJhbGxvd19jb2RlX2V4 + ZWN1dGlvbj8iOiBmYWxzZSwgIm1heF9yZXRyeV9saW1pdCI6IDIsICJ0b29sc19uYW1lcyI6IFtd + fV1K+QEKCmNyZXdfdGFza3MS6gEK5wFbeyJrZXkiOiAiZGE5NWViZGIzNmU0Y2RmOTJkZjZhNmRk + MTZiY2VlMGUiLCAiaWQiOiAiZTBmNDgzNjAtYzNjNS00ZGY1LThkZjEtNDg2ZTc4OWNiZWUyIiwg + ImFzeW5jX2V4ZWN1dGlvbj8iOiBmYWxzZSwgImh1bWFuX2lucHV0PyI6IGZhbHNlLCAiYWdlbnRf + cm9sZSI6ICJ0ZXN0IiwgImFnZW50X2tleSI6ICI5NzZmOGY1MGFjY2ZlYmEyMjNlNDljNDJiMTZl + OTllNiIsICJ0b29sc19uYW1lcyI6IFtdfV16AhgBhQEAAQAAEo4CChA4OLnKHp32b0EUM2g5rs+r + EgjifMpu5dQ6xCoMVGFzayBDcmVhdGVkMAE5x1d3X7+rIhhBMxF4X7+rIhhKLgoIY3Jld19rZXkS + IgogZmViMWUyMWIzMjU2YzU5YTY0NzE1MmFmZGQ2NjMyMmVKMQoHY3Jld19pZBImCiRkNTZmOWMx + YS1iZGQxLTQwMjctYjVmNy0zODVkZWUxZTZiOWNKLgoIdGFza19rZXkSIgogZGE5NWViZGIzNmU0 + Y2RmOTJkZjZhNmRkMTZiY2VlMGVKMQoHdGFza19pZBImCiRlMGY0ODM2MC1jM2M1LTRkZjUtOGRm + MS00ODZlNzg5Y2JlZTJ6AhgBhQEAAQAA + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '1791' + Content-Type: + - application/x-protobuf + User-Agent: + - OTel-OTLP-Exporter-Python/1.27.0 + method: POST + uri: https://telemetry.crewai.com:4319/v1/traces + response: + body: + string: "\n\0" + headers: + Content-Length: + - '2' + Content-Type: + - application/x-protobuf + Date: + - Sun, 09 Feb 2025 22:47:29 GMT + status: + code: 200 + message: OK +- request: + body: '{"messages": [{"role": "system", "content": "You are Task Execution Evaluator. + Evaluator agent for crew evaluation with precise capabilities to evaluate the + performance of the agents in the crew based on the tasks they have performed\nYour + personal goal is: Your goal is to evaluate the performance of the agents in + the crew based on the tasks they have performed using score from 1 to 10 evaluating + on completion, quality, and overall performance.\nTo give my best complete final + answer to the task respond using the exact following format:\n\nThought: I now + can give a great answer\nFinal Answer: Your final answer must be the great and + the most complete as possible, it must be outcome described.\n\nI MUST use these + formats, my job depends on it!"}, {"role": "user", "content": "\nCurrent Task: + Based on the task description and the expected output, compare and evaluate + the performance of the agents in the crew based on the Task Output they have + performed using score from 1 to 10 evaluating on completion, quality, and overall + performance.task_description: test task_expected_output: test output agent: + test agent_goal: test Task Output: I have followed all the instructions provided + in the task to the best of my ability, and the outcome of the test is as described + in the final answer. It is complete, detailed, and accurate.\n\nThis is the + expected criteria for your final answer: Evaluation Score from 1 to 10 based + on the performance of the agents on the tasks\nyou MUST return the actual complete + content as the final answer, not a summary.\nEnsure your final answer contains + only the content in the following format: {\n \"quality\": float\n}\n\nEnsure + the final output does not include any code block markers like ```json or ```python.\n\nBegin! + This is VERY important to you, use the tools available and give your best Final + Answer, your job depends on it!\n\nThought:"}], "model": "gpt-4", "stop": ["\nObservation:"]}' + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer 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 + connection: + - keep-alive + content-length: + - '1935' + content-type: + - application/json + cookie: + - __cf_bm=p1aGVyahvfLAvEwvbX0FMmrN5o18PpVAu2dG_dTgMSU-1739141229-1.0.1.1-_q7aCslZTr11IMFZ81VgyuqsGiqTARFPANUvBEWM_0dZdb97Py78KE1omxdNv5F1pFKoWUqA1kEF2wzQ2wz4aA; + _cfuvid=bsF0jwE67cS.ywAaQU59jKPFC03S1dvynClHm_wTQik-1739141229143-0.0.1.1-604800000 + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.61.0 + x-stainless-arch: + - x64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - Linux + x-stainless-package-version: + - 1.61.0 + x-stainless-raw-response: + - 'true' + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.7 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + content: "{\n \"id\": \"chatcmpl-AzANDGSiRIu1XO56ZMfLuO2SuL4l0\",\n \"object\": + \"chat.completion\",\n \"created\": 1739141247,\n \"model\": \"gpt-4-0613\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Looking at the task output provided by + the agent, the agent has expressed a high level of confidence in their ability + to follow the instructions provided to the best of their ability. The agent + seems to have executed the task with detailed attention and accuracy.\\n\\nI + need to evaluate both the quality and the overall performance of the agent keeping + in mind the task description and the expected output. Given the agent's output + and goal, I can deduce the quality of their work as well as their overall performance.\\n\\nFinal + Answer: \\n{\\n \\\"quality\\\": 8.5\\n}\",\n \"refusal\": null\n },\n + \ \"logprobs\": null,\n \"finish_reason\": \"stop\"\n }\n ],\n + \ \"usage\": {\n \"prompt_tokens\": 372,\n \"completion_tokens\": 112,\n + \ \"total_tokens\": 484,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": {\n + \ \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": null\n}\n" + headers: + CF-RAY: + - 90f766bc4d87ba33-SEA + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Sun, 09 Feb 2025 22:47:32 GMT + Server: + - cloudflare + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - crewai-iuxna1 + openai-processing-ms: + - '5056' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '1000000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '999535' + x-ratelimit-reset-requests: + - 6ms + x-ratelimit-reset-tokens: + - 27ms + x-request-id: + - req_ff2926b015823a70e2173c71f8d63209 + http_version: HTTP/1.1 + status_code: 200 +version: 1