mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-04-13 06:23:03 +00:00
Compare commits
8 Commits
devin/1775
...
1.14.0
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
25eb4adc49 | ||
|
|
1534ba202d | ||
|
|
868416bfe0 | ||
|
|
a5df7c798c | ||
|
|
5958a16ade | ||
|
|
9325e2f6a4 | ||
|
|
25e7ca03c4 | ||
|
|
5b4a0e8734 |
@@ -4,6 +4,77 @@ description: "تحديثات المنتج والتحسينات وإصلاحات
|
||||
icon: "clock"
|
||||
mode: "wide"
|
||||
---
|
||||
<Update label="7 أبريل 2026">
|
||||
## v1.14.0
|
||||
|
||||
[عرض الإصدار على GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0)
|
||||
|
||||
## ما الذي تغير
|
||||
|
||||
### الميزات
|
||||
- إضافة أوامر CLI لقائمة/معلومات نقاط التحقق
|
||||
- إضافة guardrail_type و name لتمييز التتبع
|
||||
- إضافة SqliteProvider لتخزين نقاط التحقق
|
||||
- إضافة CheckpointConfig للتسجيل التلقائي لنقاط التحقق
|
||||
- تنفيذ تسجيل حالة وقت التشغيل، نظام الأحداث، وإعادة هيكلة المنفذ
|
||||
|
||||
### إصلاحات الأخطاء
|
||||
- إضافة حماية من SSRF وتجاوز المسار
|
||||
- إضافة التحقق من المسار وعنوان URL لأدوات RAG
|
||||
- استبعاد متجهات التضمين من تسلسل الذاكرة لتوفير الرموز
|
||||
- التأكد من وجود دليل الإخراج قبل الكتابة في قالب التدفق
|
||||
- رفع litellm إلى >=1.83.0 لمعالجة CVE-2026-35030
|
||||
- إزالة حقل فهرسة SEO الذي يتسبب في عرض الصفحة العربية بشكل غير صحيح
|
||||
|
||||
### الوثائق
|
||||
- تحديث سجل التغييرات والإصدار لـ v1.14.0
|
||||
- تحديث أدلة البدء السريع والتثبيت لتحسين الوضوح
|
||||
- إضافة قسم مزودي التخزين، تصدير JsonProvider
|
||||
- إضافة دليل علامة AMP التدريبية
|
||||
|
||||
### إعادة الهيكلة
|
||||
- تنظيف واجهة برمجة تطبيقات نقاط التحقق
|
||||
- إزالة CodeInterpreterTool وإهمال معلمات تنفيذ الكود
|
||||
|
||||
## المساهمون
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @iris-clawd, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="7 أبريل 2026">
|
||||
## v1.14.0a4
|
||||
|
||||
[عرض الإصدار على GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0a4)
|
||||
|
||||
## ما الذي تغير
|
||||
|
||||
### الميزات
|
||||
- إضافة guardrail_type و name لتمييز الآثار
|
||||
- إضافة SqliteProvider لتخزين نقاط التحقق
|
||||
- إضافة CheckpointConfig للتخزين التلقائي لنقاط التحقق
|
||||
- تنفيذ نقاط التحقق لحالة التشغيل، نظام الأحداث، وإعادة هيكلة المنفذ
|
||||
|
||||
### إصلاحات الأخطاء
|
||||
- استبعاد متجهات التضمين من تسلسل الذاكرة لتوفير الرموز
|
||||
- رفع litellm إلى >=1.83.0 لمعالجة CVE-2026-35030
|
||||
|
||||
### الوثائق
|
||||
- تحديث أدلة البدء السريع والتثبيت لتحسين الوضوح
|
||||
- إضافة قسم مقدمي التخزين وتصدير JsonProvider
|
||||
|
||||
### الأداء
|
||||
- استخدام JSONB لعمود بيانات نقاط التحقق
|
||||
|
||||
### إعادة الهيكلة
|
||||
- إزالة CodeInterpreterTool وإهمال معلمات تنفيذ الكود
|
||||
|
||||
## المساهمون
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="6 أبريل 2026">
|
||||
## v1.14.0a3
|
||||
|
||||
|
||||
@@ -250,16 +250,12 @@ analysis_agent = Agent(
|
||||
|
||||
#### تنفيذ الكود
|
||||
|
||||
- `allow_code_execution`: يجب أن يكون True لتشغيل الكود
|
||||
- `code_execution_mode`:
|
||||
- `"safe"`: يستخدم Docker (موصى به للإنتاج)
|
||||
- `"unsafe"`: تنفيذ مباشر (استخدم فقط في بيئات موثوقة)
|
||||
<Warning>
|
||||
`allow_code_execution` و`code_execution_mode` مهجوران. تمت إزالة `CodeInterpreterTool` من `crewai-tools`. استخدم خدمة بيئة معزولة مخصصة مثل [E2B](https://e2b.dev) أو [Modal](https://modal.com) لتنفيذ الكود بأمان.
|
||||
</Warning>
|
||||
|
||||
<Note>
|
||||
يشغّل هذا صورة Docker افتراضية. إذا أردت تهيئة صورة Docker،
|
||||
راجع أداة Code Interpreter في قسم الأدوات. أضف أداة
|
||||
مفسر الكود كأداة في معامل أداة الوكيل.
|
||||
</Note>
|
||||
- `allow_code_execution` _(مهجور)_: كان يُمكّن تنفيذ الكود المدمج عبر `CodeInterpreterTool`.
|
||||
- `code_execution_mode` _(مهجور)_: كان يتحكم في وضع التنفيذ (`"safe"` لـ Docker، `"unsafe"` للتنفيذ المباشر).
|
||||
|
||||
#### الميزات المتقدمة
|
||||
|
||||
@@ -332,9 +328,9 @@ print(result.raw)
|
||||
|
||||
### الأمان وتنفيذ الكود
|
||||
|
||||
- عند استخدام `allow_code_execution`، كن حذرًا مع مدخلات المستخدم وتحقق منها دائمًا
|
||||
- استخدم `code_execution_mode: "safe"` (Docker) في بيئات الإنتاج
|
||||
- فكّر في تعيين حدود `max_execution_time` مناسبة لمنع الحلقات اللانهائية
|
||||
<Warning>
|
||||
`allow_code_execution` و`code_execution_mode` مهجوران وتمت إزالة `CodeInterpreterTool`. استخدم خدمة بيئة معزولة مخصصة مثل [E2B](https://e2b.dev) أو [Modal](https://modal.com) لتنفيذ الكود بأمان.
|
||||
</Warning>
|
||||
|
||||
### تحسين الأداء
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
on_events=["task_completed", "crew_kickoff_completed"],
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -50,7 +50,7 @@ crew = Crew(
|
||||
|
||||
| الحقل | النوع | الافتراضي | الوصف |
|
||||
|:------|:------|:----------|:------|
|
||||
| `directory` | `str` | `"./.checkpoints"` | مسار ملفات نقاط الحفظ |
|
||||
| `location` | `str` | `"./.checkpoints"` | مسار ملفات نقاط الحفظ |
|
||||
| `on_events` | `list[str]` | `["task_completed"]` | انواع الاحداث التي تطلق نقطة حفظ |
|
||||
| `provider` | `BaseProvider` | `JsonProvider()` | واجهة التخزين |
|
||||
| `max_checkpoints` | `int \| None` | `None` | الحد الاقصى للملفات؛ يتم حذف الاقدم اولا |
|
||||
@@ -95,7 +95,7 @@ result = crew.kickoff() # يستأنف من اخر مهمة مكتملة
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
tasks=[research_task, write_task, review_task],
|
||||
checkpoint=CheckpointConfig(directory="./crew_cp"),
|
||||
checkpoint=CheckpointConfig(location="./crew_cp"),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -118,7 +118,7 @@ class MyFlow(Flow):
|
||||
|
||||
flow = MyFlow(
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./flow_cp",
|
||||
location="./flow_cp",
|
||||
on_events=["method_execution_finished"],
|
||||
),
|
||||
)
|
||||
@@ -137,7 +137,7 @@ agent = Agent(
|
||||
goal="Research topics",
|
||||
backstory="Expert researcher",
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./agent_cp",
|
||||
location="./agent_cp",
|
||||
on_events=["lite_agent_execution_completed"],
|
||||
),
|
||||
)
|
||||
@@ -160,7 +160,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
provider=JsonProvider(),
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -179,15 +179,12 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./.checkpoints.db",
|
||||
provider=SqliteProvider(max_checkpoints=50),
|
||||
location="./.checkpoints.db",
|
||||
provider=SqliteProvider(),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
<Note>
|
||||
عند استخدام `SqliteProvider`، حقل `directory` هو مسار ملف قاعدة البيانات، وليس مجلدا.
|
||||
</Note>
|
||||
|
||||
## انواع الاحداث
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ mode: "wide"
|
||||
|
||||
# `CodeInterpreterTool`
|
||||
|
||||
<Warning>
|
||||
**مهجور:** تمت إزالة `CodeInterpreterTool` من `crewai-tools`. كما أن معاملَي `allow_code_execution` و`code_execution_mode` على `Agent` أصبحا مهجورَين. استخدم خدمة بيئة معزولة مخصصة — [E2B](https://e2b.dev) أو [Modal](https://modal.com) — لتنفيذ الكود بشكل آمن ومعزول.
|
||||
</Warning>
|
||||
|
||||
## الوصف
|
||||
|
||||
تمكّن `CodeInterpreterTool` وكلاء CrewAI من تنفيذ كود Python 3 الذي يولّدونه بشكل مستقل. هذه الوظيفة ذات قيمة خاصة لأنها تتيح للوكلاء إنشاء الكود وتنفيذه والحصول على النتائج واستخدام تلك المعلومات لاتخاذ القرارات والإجراءات اللاحقة.
|
||||
|
||||
@@ -74,3 +74,19 @@ tool = CSVSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## الأمان
|
||||
|
||||
### التحقق من صحة المسارات
|
||||
|
||||
يتم التحقق من مسارات الملفات المقدمة لهذه الأداة مقابل مجلد العمل الحالي. يتم رفض المسارات التي تحل خارج مجلد العمل وإطلاق `ValueError`.
|
||||
|
||||
للسماح بالمسارات خارج مجلد العمل (مثلاً في الاختبارات أو خطوط الأنابيب الموثوقة)، عيّن متغير البيئة التالي:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### التحقق من صحة الروابط
|
||||
|
||||
يتم التحقق من مدخلات الروابط: يتم حظر مخطط `file://` والطلبات التي تستهدف نطاقات IP الخاصة أو المحجوزة لمنع هجمات تزوير الطلبات من جانب الخادم (SSRF).
|
||||
|
||||
@@ -68,3 +68,15 @@ tool = DirectorySearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## الأمان
|
||||
|
||||
### التحقق من صحة المسارات
|
||||
|
||||
يتم التحقق من مسارات المجلدات المقدمة لهذه الأداة مقابل مجلد العمل الحالي. يتم رفض المسارات التي تحل خارج مجلد العمل وإطلاق `ValueError`.
|
||||
|
||||
للسماح بالمسارات خارج مجلد العمل (مثلاً في الاختبارات أو خطوط الأنابيب الموثوقة)، عيّن متغير البيئة التالي:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
@@ -73,3 +73,19 @@ tool = JSONSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## الأمان
|
||||
|
||||
### التحقق من صحة المسارات
|
||||
|
||||
يتم التحقق من مسارات الملفات المقدمة لهذه الأداة مقابل مجلد العمل الحالي. يتم رفض المسارات التي تحل خارج مجلد العمل وإطلاق `ValueError`.
|
||||
|
||||
للسماح بالمسارات خارج مجلد العمل (مثلاً في الاختبارات أو خطوط الأنابيب الموثوقة)، عيّن متغير البيئة التالي:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### التحقق من صحة الروابط
|
||||
|
||||
يتم التحقق من مدخلات الروابط: يتم حظر مخطط `file://` والطلبات التي تستهدف نطاقات IP الخاصة أو المحجوزة لمنع هجمات تزوير الطلبات من جانب الخادم (SSRF).
|
||||
|
||||
@@ -105,3 +105,19 @@ tool = PDFSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## الأمان
|
||||
|
||||
### التحقق من صحة المسارات
|
||||
|
||||
يتم التحقق من مسارات الملفات المقدمة لهذه الأداة مقابل مجلد العمل الحالي. يتم رفض المسارات التي تحل خارج مجلد العمل وإطلاق `ValueError`.
|
||||
|
||||
للسماح بالمسارات خارج مجلد العمل (مثلاً في الاختبارات أو خطوط الأنابيب الموثوقة)، عيّن متغير البيئة التالي:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### التحقق من صحة الروابط
|
||||
|
||||
يتم التحقق من مدخلات الروابط: يتم حظر مخطط `file://` والطلبات التي تستهدف نطاقات IP الخاصة أو المحجوزة لمنع هجمات تزوير الطلبات من جانب الخادم (SSRF).
|
||||
|
||||
3361
docs/docs.json
3361
docs/docs.json
File diff suppressed because it is too large
Load Diff
@@ -4,6 +4,77 @@ description: "Product updates, improvements, and bug fixes for CrewAI"
|
||||
icon: "clock"
|
||||
mode: "wide"
|
||||
---
|
||||
<Update label="Apr 07, 2026">
|
||||
## v1.14.0
|
||||
|
||||
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0)
|
||||
|
||||
## What's Changed
|
||||
|
||||
### Features
|
||||
- Add checkpoint list/info CLI commands
|
||||
- Add guardrail_type and name to distinguish traces
|
||||
- Add SqliteProvider for checkpoint storage
|
||||
- Add CheckpointConfig for automatic checkpointing
|
||||
- Implement runtime state checkpointing, event system, and executor refactor
|
||||
|
||||
### Bug Fixes
|
||||
- Add SSRF and path traversal protections
|
||||
- Add path and URL validation to RAG tools
|
||||
- Exclude embedding vectors from memory serialization to save tokens
|
||||
- Ensure output directory exists before writing in flow template
|
||||
- Bump litellm to >=1.83.0 to address CVE-2026-35030
|
||||
- Remove SEO indexing field causing Arabic page rendering
|
||||
|
||||
### Documentation
|
||||
- Update changelog and version for v1.14.0
|
||||
- Update quickstart and installation guides for improved clarity
|
||||
- Add storage providers section, export JsonProvider
|
||||
- Add AMP Training Tab guide
|
||||
|
||||
### Refactoring
|
||||
- Clean up checkpoint API
|
||||
- Remove CodeInterpreterTool and deprecate code execution parameters
|
||||
|
||||
## Contributors
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @iris-clawd, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="Apr 07, 2026">
|
||||
## v1.14.0a4
|
||||
|
||||
[View release on GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0a4)
|
||||
|
||||
## What's Changed
|
||||
|
||||
### Features
|
||||
- Add guardrail_type and name to distinguish traces
|
||||
- Add SqliteProvider for checkpoint storage
|
||||
- Add CheckpointConfig for automatic checkpointing
|
||||
- Implement runtime state checkpointing, event system, and executor refactor
|
||||
|
||||
### Bug Fixes
|
||||
- Exclude embedding vectors from memory serialization to save tokens
|
||||
- Bump litellm to >=1.83.0 to address CVE-2026-35030
|
||||
|
||||
### Documentation
|
||||
- Update quickstart and installation guides for improved clarity
|
||||
- Add storage providers section and export JsonProvider
|
||||
|
||||
### Performance
|
||||
- Use JSONB for checkpoint data column
|
||||
|
||||
### Refactoring
|
||||
- Remove CodeInterpreterTool and deprecate code execution params
|
||||
|
||||
## Contributors
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="Apr 06, 2026">
|
||||
## v1.14.0a3
|
||||
|
||||
|
||||
@@ -308,16 +308,12 @@ multimodal_agent = Agent(
|
||||
|
||||
#### Code Execution
|
||||
|
||||
- `allow_code_execution`: Must be True to run code
|
||||
- `code_execution_mode`:
|
||||
- `"safe"`: Uses Docker (recommended for production)
|
||||
- `"unsafe"`: Direct execution (use only in trusted environments)
|
||||
<Warning>
|
||||
`allow_code_execution` and `code_execution_mode` are deprecated. `CodeInterpreterTool` has been removed from `crewai-tools`. Use a dedicated sandbox service such as [E2B](https://e2b.dev) or [Modal](https://modal.com) for secure code execution.
|
||||
</Warning>
|
||||
|
||||
<Note>
|
||||
This runs a default Docker image. If you want to configure the docker image,
|
||||
the checkout the Code Interpreter Tool in the tools section. Add the code
|
||||
interpreter tool as a tool in the agent as a tool parameter.
|
||||
</Note>
|
||||
- `allow_code_execution` _(deprecated)_: Previously enabled built-in code execution via `CodeInterpreterTool`.
|
||||
- `code_execution_mode` _(deprecated)_: Previously controlled execution mode (`"safe"` for Docker, `"unsafe"` for direct execution).
|
||||
|
||||
#### Advanced Features
|
||||
|
||||
@@ -667,9 +663,9 @@ asyncio.run(main())
|
||||
|
||||
### Security and Code Execution
|
||||
|
||||
- When using `allow_code_execution`, be cautious with user input and always validate it
|
||||
- Use `code_execution_mode: "safe"` (Docker) in production environments
|
||||
- Consider setting appropriate `max_execution_time` limits to prevent infinite loops
|
||||
<Warning>
|
||||
`allow_code_execution` and `code_execution_mode` are deprecated and `CodeInterpreterTool` has been removed. Use a dedicated sandbox service such as [E2B](https://e2b.dev) or [Modal](https://modal.com) for secure code execution.
|
||||
</Warning>
|
||||
|
||||
### Performance Optimization
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
on_events=["task_completed", "crew_kickoff_completed"],
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -50,10 +50,10 @@ crew = Crew(
|
||||
|
||||
| Field | Type | Default | Description |
|
||||
|:------|:-----|:--------|:------------|
|
||||
| `directory` | `str` | `"./.checkpoints"` | Filesystem path for checkpoint files |
|
||||
| `location` | `str` | `"./.checkpoints"` | Storage destination — a directory for `JsonProvider`, a database file path for `SqliteProvider` |
|
||||
| `on_events` | `list[str]` | `["task_completed"]` | Event types that trigger a checkpoint |
|
||||
| `provider` | `BaseProvider` | `JsonProvider()` | Storage backend |
|
||||
| `max_checkpoints` | `int \| None` | `None` | Max files to keep; oldest pruned first |
|
||||
| `max_checkpoints` | `int \| None` | `None` | Max checkpoints to keep. Oldest are pruned after each write. Pruning is handled by the provider. |
|
||||
|
||||
### Inheritance and Opt-Out
|
||||
|
||||
@@ -95,7 +95,7 @@ The restored crew skips already-completed tasks and resumes from the first incom
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
tasks=[research_task, write_task, review_task],
|
||||
checkpoint=CheckpointConfig(directory="./crew_cp"),
|
||||
checkpoint=CheckpointConfig(location="./crew_cp"),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -118,7 +118,7 @@ class MyFlow(Flow):
|
||||
|
||||
flow = MyFlow(
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./flow_cp",
|
||||
location="./flow_cp",
|
||||
on_events=["method_execution_finished"],
|
||||
),
|
||||
)
|
||||
@@ -137,7 +137,7 @@ agent = Agent(
|
||||
goal="Research topics",
|
||||
backstory="Expert researcher",
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./agent_cp",
|
||||
location="./agent_cp",
|
||||
on_events=["lite_agent_execution_completed"],
|
||||
),
|
||||
)
|
||||
@@ -160,14 +160,14 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
provider=JsonProvider(), # this is the default
|
||||
max_checkpoints=5, # prunes oldest files
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
Files are named `<timestamp>_<uuid>.json` inside the directory.
|
||||
Files are named `<timestamp>_<uuid>.json` inside the location directory.
|
||||
|
||||
### SqliteProvider
|
||||
|
||||
@@ -181,17 +181,14 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./.checkpoints.db",
|
||||
provider=SqliteProvider(max_checkpoints=50),
|
||||
location="./.checkpoints.db",
|
||||
provider=SqliteProvider(),
|
||||
max_checkpoints=50,
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
`SqliteProvider` accepts its own `max_checkpoints` parameter that prunes old rows via SQL. WAL journal mode is enabled for concurrent read access.
|
||||
|
||||
<Note>
|
||||
When using `SqliteProvider`, the `directory` field is the database file path, not a directory. The `max_checkpoints` on `CheckpointConfig` controls filesystem pruning (for `JsonProvider`), while `SqliteProvider.max_checkpoints` controls row pruning in the database.
|
||||
</Note>
|
||||
WAL journal mode is enabled for concurrent read access.
|
||||
|
||||
## Event Types
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ mode: "wide"
|
||||
|
||||
# `CodeInterpreterTool`
|
||||
|
||||
<Warning>
|
||||
**Deprecated:** `CodeInterpreterTool` has been removed from `crewai-tools`. The `allow_code_execution` and `code_execution_mode` parameters on `Agent` are also deprecated. Use a dedicated sandbox service — [E2B](https://e2b.dev) or [Modal](https://modal.com) — for secure, isolated code execution.
|
||||
</Warning>
|
||||
|
||||
## Description
|
||||
|
||||
The `CodeInterpreterTool` enables CrewAI agents to execute Python 3 code that they generate autonomously. This functionality is particularly valuable as it allows agents to create code, execute it, obtain the results, and utilize that information to inform subsequent decisions and actions.
|
||||
|
||||
@@ -75,4 +75,20 @@ tool = CSVSearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
## Security
|
||||
|
||||
### Path Validation
|
||||
|
||||
File paths provided to this tool are validated against the current working directory. Paths that resolve outside the working directory are rejected with a `ValueError`.
|
||||
|
||||
To allow paths outside the working directory (for example, in tests or trusted pipelines), set the environment variable:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### URL Validation
|
||||
|
||||
URL inputs are validated: `file://` URIs and requests targeting private or reserved IP ranges are blocked to prevent server-side request forgery (SSRF) attacks.
|
||||
```
|
||||
@@ -67,4 +67,16 @@ tool = DirectorySearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
## Security
|
||||
|
||||
### Path Validation
|
||||
|
||||
Directory paths provided to this tool are validated against the current working directory. Paths that resolve outside the working directory are rejected with a `ValueError`.
|
||||
|
||||
To allow paths outside the working directory (for example, in tests or trusted pipelines), set the environment variable:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
```
|
||||
@@ -74,3 +74,19 @@ tool = JSONSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## Security
|
||||
|
||||
### Path Validation
|
||||
|
||||
File paths provided to this tool are validated against the current working directory. Paths that resolve outside the working directory are rejected with a `ValueError`.
|
||||
|
||||
To allow paths outside the working directory (for example, in tests or trusted pipelines), set the environment variable:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### URL Validation
|
||||
|
||||
URL inputs are validated: `file://` URIs and requests targeting private or reserved IP ranges are blocked to prevent server-side request forgery (SSRF) attacks.
|
||||
|
||||
@@ -105,4 +105,20 @@ tool = PDFSearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
## Security
|
||||
|
||||
### Path Validation
|
||||
|
||||
File paths provided to this tool are validated against the current working directory. Paths that resolve outside the working directory are rejected with a `ValueError`.
|
||||
|
||||
To allow paths outside the working directory (for example, in tests or trusted pipelines), set the environment variable:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### URL Validation
|
||||
|
||||
URL inputs are validated: `file://` URIs and requests targeting private or reserved IP ranges are blocked to prevent server-side request forgery (SSRF) attacks.
|
||||
```
|
||||
@@ -4,6 +4,77 @@ description: "CrewAI의 제품 업데이트, 개선 사항 및 버그 수정"
|
||||
icon: "clock"
|
||||
mode: "wide"
|
||||
---
|
||||
<Update label="2026년 4월 7일">
|
||||
## v1.14.0
|
||||
|
||||
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0)
|
||||
|
||||
## 변경 사항
|
||||
|
||||
### 기능
|
||||
- 체크포인트 목록/정보 CLI 명령 추가
|
||||
- 추적을 구분하기 위한 guardrail_type 및 이름 추가
|
||||
- 체크포인트 저장을 위한 SqliteProvider 추가
|
||||
- 자동 체크포인트 생성을 위한 CheckpointConfig 추가
|
||||
- 런타임 상태 체크포인트, 이벤트 시스템 및 실행기 리팩토링 구현
|
||||
|
||||
### 버그 수정
|
||||
- SSRF 및 경로 탐색 보호 추가
|
||||
- RAG 도구에 경로 및 URL 유효성 검사 추가
|
||||
- 토큰 절약을 위해 메모리 직렬화에서 임베딩 벡터 제외
|
||||
- 흐름 템플릿에 쓰기 전에 출력 디렉토리가 존재하는지 확인
|
||||
- CVE-2026-35030 문제를 해결하기 위해 litellm을 >=1.83.0으로 업데이트
|
||||
- 아랍어 페이지 렌더링을 유발하는 SEO 인덱싱 필드 제거
|
||||
|
||||
### 문서
|
||||
- v1.14.0에 대한 변경 로그 및 버전 업데이트
|
||||
- 명확성을 개선하기 위해 빠른 시작 및 설치 가이드 업데이트
|
||||
- 저장소 제공자 섹션 추가, JsonProvider 내보내기
|
||||
- AMP 교육 탭 가이드 추가
|
||||
|
||||
### 리팩토링
|
||||
- 체크포인트 API 정리
|
||||
- CodeInterpreterTool 제거 및 코드 실행 매개변수 사용 중단
|
||||
|
||||
## 기여자
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @iris-clawd, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="2026년 4월 7일">
|
||||
## v1.14.0a4
|
||||
|
||||
[GitHub 릴리스 보기](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0a4)
|
||||
|
||||
## 변경 사항
|
||||
|
||||
### 기능
|
||||
- 추적을 구분하기 위해 guardrail_type 및 이름 추가
|
||||
- 체크포인트 저장을 위한 SqliteProvider 추가
|
||||
- 자동 체크포인트 생성을 위한 CheckpointConfig 추가
|
||||
- 런타임 상태 체크포인트, 이벤트 시스템 및 실행기 리팩토링 구현
|
||||
|
||||
### 버그 수정
|
||||
- 토큰 절약을 위해 메모리 직렬화에서 임베딩 벡터 제외
|
||||
- CVE-2026-35030 문제를 해결하기 위해 litellm을 >=1.83.0으로 업데이트
|
||||
|
||||
### 문서
|
||||
- 명확성을 개선하기 위해 빠른 시작 및 설치 가이드 업데이트
|
||||
- 저장소 제공자 섹션 추가 및 JsonProvider 내보내기
|
||||
|
||||
### 성능
|
||||
- 체크포인트 데이터 열에 JSONB 사용
|
||||
|
||||
### 리팩토링
|
||||
- CodeInterpreterTool 제거 및 코드 실행 매개변수 사용 중단
|
||||
|
||||
## 기여자
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="2026년 4월 6일">
|
||||
## v1.14.0a3
|
||||
|
||||
|
||||
@@ -291,15 +291,13 @@ multimodal_agent = Agent(
|
||||
- `max_retry_limit`: 오류 발생 시 재시도 횟수
|
||||
|
||||
#### 코드 실행
|
||||
- `allow_code_execution`: 코드를 실행하려면 True여야 합니다
|
||||
- `code_execution_mode`:
|
||||
- `"safe"`: Docker를 사용합니다 (프로덕션에 권장)
|
||||
- `"unsafe"`: 직접 실행 (신뢰할 수 있는 환경에서만 사용)
|
||||
|
||||
<Note>
|
||||
이 옵션은 기본 Docker 이미지를 실행합니다. Docker 이미지를 구성하려면 도구 섹션에 있는 Code Interpreter Tool을 확인하십시오.
|
||||
Code Interpreter Tool을 에이전트의 도구 파라미터로 추가하십시오.
|
||||
</Note>
|
||||
<Warning>
|
||||
`allow_code_execution` 및 `code_execution_mode`는 더 이상 사용되지 않습니다. `CodeInterpreterTool`이 `crewai-tools`에서 제거되었습니다. 안전한 코드 실행을 위해 [E2B](https://e2b.dev) 또는 [Modal](https://modal.com)과 같은 전용 샌드박스 서비스를 사용하세요.
|
||||
</Warning>
|
||||
|
||||
- `allow_code_execution` _(지원 중단)_: 이전에 `CodeInterpreterTool`을 통한 내장 코드 실행을 활성화했습니다.
|
||||
- `code_execution_mode` _(지원 중단)_: 이전에 실행 모드를 제어했습니다 (Docker의 경우 `"safe"`, 직접 실행의 경우 `"unsafe"`).
|
||||
|
||||
#### 고급 기능
|
||||
- `multimodal`: 텍스트와 시각적 콘텐츠 처리를 위한 멀티모달 기능 활성화
|
||||
@@ -627,9 +625,10 @@ asyncio.run(main())
|
||||
## 중요한 고려사항 및 모범 사례
|
||||
|
||||
### 보안 및 코드 실행
|
||||
- `allow_code_execution`을 사용할 때는 사용자 입력에 주의하고 항상 입력 값을 검증하세요
|
||||
- 운영 환경에서는 `code_execution_mode: "safe"`(Docker)를 사용하세요
|
||||
- 무한 루프를 방지하기 위해 적절한 `max_execution_time` 제한을 설정하는 것을 고려하세요
|
||||
|
||||
<Warning>
|
||||
`allow_code_execution` 및 `code_execution_mode`는 더 이상 사용되지 않으며 `CodeInterpreterTool`이 제거되었습니다. 안전한 코드 실행을 위해 [E2B](https://e2b.dev) 또는 [Modal](https://modal.com)과 같은 전용 샌드박스 서비스를 사용하세요.
|
||||
</Warning>
|
||||
|
||||
### 성능 최적화
|
||||
- `respect_context_window: true`를 사용하여 토큰 제한 문제를 방지하세요.
|
||||
|
||||
@@ -39,7 +39,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
on_events=["task_completed", "crew_kickoff_completed"],
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -50,7 +50,7 @@ crew = Crew(
|
||||
|
||||
| 필드 | 타입 | 기본값 | 설명 |
|
||||
|:-----|:-----|:-------|:-----|
|
||||
| `directory` | `str` | `"./.checkpoints"` | 체크포인트 파일 경로 |
|
||||
| `location` | `str` | `"./.checkpoints"` | 체크포인트 파일 경로 |
|
||||
| `on_events` | `list[str]` | `["task_completed"]` | 체크포인트를 트리거하는 이벤트 타입 |
|
||||
| `provider` | `BaseProvider` | `JsonProvider()` | 스토리지 백엔드 |
|
||||
| `max_checkpoints` | `int \| None` | `None` | 보관할 최대 파일 수; 오래된 것부터 삭제 |
|
||||
@@ -95,7 +95,7 @@ result = crew.kickoff() # 마지막으로 완료된 태스크부터 재개
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
tasks=[research_task, write_task, review_task],
|
||||
checkpoint=CheckpointConfig(directory="./crew_cp"),
|
||||
checkpoint=CheckpointConfig(location="./crew_cp"),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -118,7 +118,7 @@ class MyFlow(Flow):
|
||||
|
||||
flow = MyFlow(
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./flow_cp",
|
||||
location="./flow_cp",
|
||||
on_events=["method_execution_finished"],
|
||||
),
|
||||
)
|
||||
@@ -137,7 +137,7 @@ agent = Agent(
|
||||
goal="Research topics",
|
||||
backstory="Expert researcher",
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./agent_cp",
|
||||
location="./agent_cp",
|
||||
on_events=["lite_agent_execution_completed"],
|
||||
),
|
||||
)
|
||||
@@ -160,7 +160,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
provider=JsonProvider(),
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -179,15 +179,12 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./.checkpoints.db",
|
||||
provider=SqliteProvider(max_checkpoints=50),
|
||||
location="./.checkpoints.db",
|
||||
provider=SqliteProvider(),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
<Note>
|
||||
`SqliteProvider`를 사용할 때 `directory` 필드는 디렉토리가 아닌 데이터베이스 파일 경로입니다.
|
||||
</Note>
|
||||
|
||||
## 이벤트 타입
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ mode: "wide"
|
||||
|
||||
# `CodeInterpreterTool`
|
||||
|
||||
<Warning>
|
||||
**지원 중단:** `CodeInterpreterTool`이 `crewai-tools`에서 제거되었습니다. `Agent`의 `allow_code_execution` 및 `code_execution_mode` 파라미터도 더 이상 사용되지 않습니다. 안전하고 격리된 코드 실행을 위해 전용 샌드박스 서비스 — [E2B](https://e2b.dev) 또는 [Modal](https://modal.com) — 을 사용하세요.
|
||||
</Warning>
|
||||
|
||||
## 설명
|
||||
|
||||
`CodeInterpreterTool`은 CrewAI 에이전트가 자율적으로 생성한 Python 3 코드를 실행할 수 있도록 합니다. 이 기능은 에이전트가 코드를 생성하고, 실행하며, 결과를 얻고, 그 정보를 활용하여 이후의 결정과 행동에 반영할 수 있다는 점에서 특히 유용합니다.
|
||||
|
||||
@@ -76,3 +76,19 @@ tool = CSVSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## 보안
|
||||
|
||||
### 경로 유효성 검사
|
||||
|
||||
이 도구에 제공되는 파일 경로는 현재 작업 디렉터리에 대해 검증됩니다. 작업 디렉터리 외부로 확인되는 경로는 `ValueError`로 거부됩니다.
|
||||
|
||||
작업 디렉터리 외부의 경로를 허용하려면 (예: 테스트 또는 신뢰할 수 있는 파이프라인), 다음 환경 변수를 설정하세요:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### URL 유효성 검사
|
||||
|
||||
URL 입력도 검증됩니다: `file://` URI와 사설 또는 예약된 IP 범위를 대상으로 하는 요청은 서버 측 요청 위조(SSRF) 공격을 방지하기 위해 차단됩니다.
|
||||
|
||||
@@ -68,3 +68,15 @@ tool = DirectorySearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## 보안
|
||||
|
||||
### 경로 유효성 검사
|
||||
|
||||
이 도구에 제공되는 디렉터리 경로는 현재 작업 디렉터리에 대해 검증됩니다. 작업 디렉터리 외부로 확인되는 경로는 `ValueError`로 거부됩니다.
|
||||
|
||||
작업 디렉터리 외부의 경로를 허용하려면 (예: 테스트 또는 신뢰할 수 있는 파이프라인), 다음 환경 변수를 설정하세요:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
@@ -71,3 +71,19 @@ tool = JSONSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## 보안
|
||||
|
||||
### 경로 유효성 검사
|
||||
|
||||
이 도구에 제공되는 파일 경로는 현재 작업 디렉터리에 대해 검증됩니다. 작업 디렉터리 외부로 확인되는 경로는 `ValueError`로 거부됩니다.
|
||||
|
||||
작업 디렉터리 외부의 경로를 허용하려면 (예: 테스트 또는 신뢰할 수 있는 파이프라인), 다음 환경 변수를 설정하세요:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### URL 유효성 검사
|
||||
|
||||
URL 입력도 검증됩니다: `file://` URI와 사설 또는 예약된 IP 범위를 대상으로 하는 요청은 서버 측 요청 위조(SSRF) 공격을 방지하기 위해 차단됩니다.
|
||||
|
||||
@@ -102,3 +102,19 @@ tool = PDFSearchTool(
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## 보안
|
||||
|
||||
### 경로 유효성 검사
|
||||
|
||||
이 도구에 제공되는 파일 경로는 현재 작업 디렉터리에 대해 검증됩니다. 작업 디렉터리 외부로 확인되는 경로는 `ValueError`로 거부됩니다.
|
||||
|
||||
작업 디렉터리 외부의 경로를 허용하려면 (예: 테스트 또는 신뢰할 수 있는 파이프라인), 다음 환경 변수를 설정하세요:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### URL 유효성 검사
|
||||
|
||||
URL 입력도 검증됩니다: `file://` URI와 사설 또는 예약된 IP 범위를 대상으로 하는 요청은 서버 측 요청 위조(SSRF) 공격을 방지하기 위해 차단됩니다.
|
||||
|
||||
@@ -4,6 +4,77 @@ description: "Atualizações de produto, melhorias e correções do CrewAI"
|
||||
icon: "clock"
|
||||
mode: "wide"
|
||||
---
|
||||
<Update label="07 abr 2026">
|
||||
## v1.14.0
|
||||
|
||||
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0)
|
||||
|
||||
## O que Mudou
|
||||
|
||||
### Recursos
|
||||
- Adicionar comandos CLI de lista/informações de checkpoint
|
||||
- Adicionar guardrail_type e nome para distinguir rastros
|
||||
- Adicionar SqliteProvider para armazenamento de checkpoints
|
||||
- Adicionar CheckpointConfig para checkpointing automático
|
||||
- Implementar checkpointing de estado em tempo de execução, sistema de eventos e refatoração do executor
|
||||
|
||||
### Correções de Bugs
|
||||
- Adicionar proteções contra SSRF e travessia de caminho
|
||||
- Adicionar validação de caminho e URL às ferramentas RAG
|
||||
- Excluir vetores de incorporação da serialização de memória para economizar tokens
|
||||
- Garantir que o diretório de saída exista antes de escrever no modelo de fluxo
|
||||
- Atualizar litellm para >=1.83.0 para resolver CVE-2026-35030
|
||||
- Remover campo de indexação SEO que causava renderização de página em árabe
|
||||
|
||||
### Documentação
|
||||
- Atualizar changelog e versão para v1.14.0
|
||||
- Atualizar guias de início rápido e instalação para maior clareza
|
||||
- Adicionar seção de provedores de armazenamento, exportar JsonProvider
|
||||
- Adicionar guia da aba de Treinamento AMP
|
||||
|
||||
### Refatoração
|
||||
- Limpar API de checkpoint
|
||||
- Remover CodeInterpreterTool e descontinuar parâmetros de execução de código
|
||||
|
||||
## Contribuidores
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @iris-clawd, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="07 abr 2026">
|
||||
## v1.14.0a4
|
||||
|
||||
[Ver release no GitHub](https://github.com/crewAIInc/crewAI/releases/tag/1.14.0a4)
|
||||
|
||||
## O que Mudou
|
||||
|
||||
### Recursos
|
||||
- Adicionar guardrail_type e nome para distinguir rastros
|
||||
- Adicionar SqliteProvider para armazenamento de checkpoints
|
||||
- Adicionar CheckpointConfig para checkpointing automático
|
||||
- Implementar checkpointing de estado em tempo de execução, sistema de eventos e refatoração do executor
|
||||
|
||||
### Correções de Bugs
|
||||
- Excluir vetores de incorporação da serialização de memória para economizar tokens
|
||||
- Atualizar litellm para >=1.83.0 para resolver CVE-2026-35030
|
||||
|
||||
### Documentação
|
||||
- Atualizar guias de início rápido e instalação para melhor clareza
|
||||
- Adicionar seção de provedores de armazenamento e exportar JsonProvider
|
||||
|
||||
### Desempenho
|
||||
- Usar JSONB para a coluna de dados de checkpoint
|
||||
|
||||
### Refatoração
|
||||
- Remover CodeInterpreterTool e descontinuar parâmetros de execução de código
|
||||
|
||||
## Contribuidores
|
||||
|
||||
@alex-clawd, @github-actions[bot], @greysonlalonde, @joaomdmoura, @lorenzejay, @lucasgomide
|
||||
|
||||
</Update>
|
||||
|
||||
<Update label="06 abr 2026">
|
||||
## v1.14.0a3
|
||||
|
||||
|
||||
@@ -304,17 +304,12 @@ multimodal_agent = Agent(
|
||||
|
||||
#### Execução de Código
|
||||
|
||||
- `allow_code_execution`: Deve ser True para permitir execução de código
|
||||
- `code_execution_mode`:
|
||||
- `"safe"`: Usa Docker (recomendado para produção)
|
||||
- `"unsafe"`: Execução direta (apenas em ambientes confiáveis)
|
||||
<Warning>
|
||||
`allow_code_execution` e `code_execution_mode` estão depreciados. O `CodeInterpreterTool` foi removido do `crewai-tools`. Use um serviço de sandbox dedicado como [E2B](https://e2b.dev) ou [Modal](https://modal.com) para execução segura de código.
|
||||
</Warning>
|
||||
|
||||
<Note>
|
||||
Isso executa uma imagem Docker padrão. Se você deseja configurar a imagem
|
||||
Docker, veja a ferramenta Code Interpreter na seção de ferramentas. Adicione a
|
||||
ferramenta de interpretação de código como um parâmetro em ferramentas no
|
||||
agente.
|
||||
</Note>
|
||||
- `allow_code_execution` _(depreciado)_: Anteriormente habilitava a execução de código embutida via `CodeInterpreterTool`.
|
||||
- `code_execution_mode` _(depreciado)_: Anteriormente controlava o modo de execução (`"safe"` para Docker, `"unsafe"` para execução direta).
|
||||
|
||||
#### Funcionalidades Avançadas
|
||||
|
||||
@@ -565,9 +560,9 @@ agent = Agent(
|
||||
|
||||
### Segurança e Execução de Código
|
||||
|
||||
- Ao usar `allow_code_execution`, seja cauteloso com entradas do usuário e sempre as valide
|
||||
- Use `code_execution_mode: "safe"` (Docker) em ambientes de produção
|
||||
- Considere definir limites adequados de `max_execution_time` para evitar loops infinitos
|
||||
<Warning>
|
||||
`allow_code_execution` e `code_execution_mode` estão depreciados e o `CodeInterpreterTool` foi removido. Use um serviço de sandbox dedicado como [E2B](https://e2b.dev) ou [Modal](https://modal.com) para execução segura de código.
|
||||
</Warning>
|
||||
|
||||
### Otimização de Performance
|
||||
|
||||
|
||||
@@ -39,7 +39,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
on_events=["task_completed", "crew_kickoff_completed"],
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -50,7 +50,7 @@ crew = Crew(
|
||||
|
||||
| Campo | Tipo | Padrao | Descricao |
|
||||
|:------|:-----|:-------|:----------|
|
||||
| `directory` | `str` | `"./.checkpoints"` | Caminho para os arquivos de checkpoint |
|
||||
| `location` | `str` | `"./.checkpoints"` | Caminho para os arquivos de checkpoint |
|
||||
| `on_events` | `list[str]` | `["task_completed"]` | Tipos de evento que acionam um checkpoint |
|
||||
| `provider` | `BaseProvider` | `JsonProvider()` | Backend de armazenamento |
|
||||
| `max_checkpoints` | `int \| None` | `None` | Maximo de arquivos a manter; os mais antigos sao removidos primeiro |
|
||||
@@ -95,7 +95,7 @@ A crew restaurada pula tarefas ja concluidas e retoma a partir da primeira incom
|
||||
crew = Crew(
|
||||
agents=[researcher, writer],
|
||||
tasks=[research_task, write_task, review_task],
|
||||
checkpoint=CheckpointConfig(directory="./crew_cp"),
|
||||
checkpoint=CheckpointConfig(location="./crew_cp"),
|
||||
)
|
||||
```
|
||||
|
||||
@@ -118,7 +118,7 @@ class MyFlow(Flow):
|
||||
|
||||
flow = MyFlow(
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./flow_cp",
|
||||
location="./flow_cp",
|
||||
on_events=["method_execution_finished"],
|
||||
),
|
||||
)
|
||||
@@ -137,7 +137,7 @@ agent = Agent(
|
||||
goal="Research topics",
|
||||
backstory="Expert researcher",
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./agent_cp",
|
||||
location="./agent_cp",
|
||||
on_events=["lite_agent_execution_completed"],
|
||||
),
|
||||
)
|
||||
@@ -160,7 +160,7 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./my_checkpoints",
|
||||
location="./my_checkpoints",
|
||||
provider=JsonProvider(),
|
||||
max_checkpoints=5,
|
||||
),
|
||||
@@ -179,15 +179,12 @@ crew = Crew(
|
||||
agents=[...],
|
||||
tasks=[...],
|
||||
checkpoint=CheckpointConfig(
|
||||
directory="./.checkpoints.db",
|
||||
provider=SqliteProvider(max_checkpoints=50),
|
||||
location="./.checkpoints.db",
|
||||
provider=SqliteProvider(),
|
||||
),
|
||||
)
|
||||
```
|
||||
|
||||
<Note>
|
||||
Ao usar `SqliteProvider`, o campo `directory` e o caminho do arquivo de banco de dados, nao um diretorio.
|
||||
</Note>
|
||||
|
||||
## Tipos de Evento
|
||||
|
||||
|
||||
@@ -7,6 +7,10 @@ mode: "wide"
|
||||
|
||||
# `CodeInterpreterTool`
|
||||
|
||||
<Warning>
|
||||
**Depreciado:** O `CodeInterpreterTool` foi removido do `crewai-tools`. Os parâmetros `allow_code_execution` e `code_execution_mode` do `Agent` também estão depreciados. Use um serviço de sandbox dedicado — [E2B](https://e2b.dev) ou [Modal](https://modal.com) — para execução de código segura e isolada.
|
||||
</Warning>
|
||||
|
||||
## Descrição
|
||||
|
||||
O `CodeInterpreterTool` permite que agentes CrewAI executem códigos Python 3 gerados autonomamente. Essa funcionalidade é particularmente valiosa, pois permite que os agentes criem códigos, os executem, obtenham os resultados e usem essas informações para orientar decisões e ações subsequentes.
|
||||
|
||||
@@ -75,4 +75,20 @@ tool = CSVSearchTool(
|
||||
),
|
||||
)
|
||||
)
|
||||
|
||||
## Segurança
|
||||
|
||||
### Validação de Caminhos
|
||||
|
||||
Os caminhos de arquivo fornecidos a esta ferramenta são validados em relação ao diretório de trabalho atual. Caminhos que resolvem fora do diretório de trabalho são rejeitados com um `ValueError`.
|
||||
|
||||
Para permitir caminhos fora do diretório de trabalho (por exemplo, em testes ou pipelines confiáveis), defina a variável de ambiente:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### Validação de URLs
|
||||
|
||||
Entradas de URL também são validadas: URIs `file://` e requisições direcionadas a faixas de IP privadas ou reservadas são bloqueadas para prevenir ataques de falsificação de requisições do lado do servidor (SSRF).
|
||||
```
|
||||
@@ -67,4 +67,16 @@ tool = DirectorySearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
|
||||
## Segurança
|
||||
|
||||
### Validação de Caminhos
|
||||
|
||||
Os caminhos de diretório fornecidos a esta ferramenta são validados em relação ao diretório de trabalho atual. Caminhos que resolvem fora do diretório de trabalho são rejeitados com um `ValueError`.
|
||||
|
||||
Para permitir caminhos fora do diretório de trabalho (por exemplo, em testes ou pipelines confiáveis), defina a variável de ambiente:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
@@ -73,4 +73,20 @@ tool = JSONSearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
|
||||
## Segurança
|
||||
|
||||
### Validação de Caminhos
|
||||
|
||||
Os caminhos de arquivo fornecidos a esta ferramenta são validados em relação ao diretório de trabalho atual. Caminhos que resolvem fora do diretório de trabalho são rejeitados com um `ValueError`.
|
||||
|
||||
Para permitir caminhos fora do diretório de trabalho (por exemplo, em testes ou pipelines confiáveis), defina a variável de ambiente:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### Validação de URLs
|
||||
|
||||
Entradas de URL também são validadas: URIs `file://` e requisições direcionadas a faixas de IP privadas ou reservadas são bloqueadas para prevenir ataques de falsificação de requisições do lado do servidor (SSRF).
|
||||
```
|
||||
@@ -101,4 +101,20 @@ tool = PDFSearchTool(
|
||||
},
|
||||
}
|
||||
)
|
||||
```
|
||||
```
|
||||
|
||||
## Segurança
|
||||
|
||||
### Validação de Caminhos
|
||||
|
||||
Os caminhos de arquivo fornecidos a esta ferramenta são validados em relação ao diretório de trabalho atual. Caminhos que resolvem fora do diretório de trabalho são rejeitados com um `ValueError`.
|
||||
|
||||
Para permitir caminhos fora do diretório de trabalho (por exemplo, em testes ou pipelines confiáveis), defina a variável de ambiente:
|
||||
|
||||
```shell
|
||||
CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true
|
||||
```
|
||||
|
||||
### Validação de URLs
|
||||
|
||||
Entradas de URL também são validadas: URIs `file://` e requisições direcionadas a faixas de IP privadas ou reservadas são bloqueadas para prevenir ataques de falsificação de requisições do lado do servidor (SSRF).
|
||||
@@ -152,4 +152,4 @@ __all__ = [
|
||||
"wrap_file_source",
|
||||
]
|
||||
|
||||
__version__ = "1.14.0a3"
|
||||
__version__ = "1.14.0"
|
||||
|
||||
@@ -10,7 +10,7 @@ requires-python = ">=3.10, <3.14"
|
||||
dependencies = [
|
||||
"pytube~=15.0.0",
|
||||
"requests~=2.32.5",
|
||||
"crewai==1.14.0a3",
|
||||
"crewai==1.14.0",
|
||||
"tiktoken~=0.8.0",
|
||||
"beautifulsoup4~=4.13.4",
|
||||
"python-docx~=1.2.0",
|
||||
|
||||
@@ -305,4 +305,4 @@ __all__ = [
|
||||
"ZapierActionTools",
|
||||
]
|
||||
|
||||
__version__ = "1.14.0a3"
|
||||
__version__ = "1.14.0"
|
||||
|
||||
@@ -109,7 +109,7 @@ class DataTypes:
|
||||
if isinstance(content, str):
|
||||
try:
|
||||
url = urlparse(content)
|
||||
is_url = bool(url.scheme and url.netloc) or url.scheme == "file"
|
||||
is_url = bool(url.scheme in ("http", "https") and url.netloc)
|
||||
except Exception: # noqa: S110
|
||||
pass
|
||||
|
||||
|
||||
205
lib/crewai-tools/src/crewai_tools/security/safe_path.py
Normal file
205
lib/crewai-tools/src/crewai_tools/security/safe_path.py
Normal file
@@ -0,0 +1,205 @@
|
||||
"""Path and URL validation utilities for crewai-tools.
|
||||
|
||||
Provides validation for file paths and URLs to prevent unauthorized
|
||||
file access and server-side request forgery (SSRF) when tools accept
|
||||
user-controlled or LLM-controlled inputs at runtime.
|
||||
|
||||
Set CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true to bypass validation (not
|
||||
recommended for production).
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import ipaddress
|
||||
import logging
|
||||
import os
|
||||
import socket
|
||||
from urllib.parse import urlparse
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_UNSAFE_PATHS_ENV = "CREWAI_TOOLS_ALLOW_UNSAFE_PATHS"
|
||||
|
||||
|
||||
def _is_escape_hatch_enabled() -> bool:
|
||||
"""Check if the unsafe paths escape hatch is enabled."""
|
||||
return os.environ.get(_UNSAFE_PATHS_ENV, "").lower() in ("true", "1", "yes")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File path validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
def validate_file_path(path: str, base_dir: str | None = None) -> str:
|
||||
"""Validate that a file path is safe to read.
|
||||
|
||||
Resolves symlinks and ``..`` components, then checks that the resolved
|
||||
path falls within *base_dir* (defaults to the current working directory).
|
||||
|
||||
Args:
|
||||
path: The file path to validate.
|
||||
base_dir: Allowed root directory. Defaults to ``os.getcwd()``.
|
||||
|
||||
Returns:
|
||||
The resolved, validated absolute path.
|
||||
|
||||
Raises:
|
||||
ValueError: If the path escapes the allowed directory.
|
||||
"""
|
||||
if _is_escape_hatch_enabled():
|
||||
logger.warning(
|
||||
"%s is enabled — skipping file path validation for: %s",
|
||||
_UNSAFE_PATHS_ENV,
|
||||
path,
|
||||
)
|
||||
return os.path.realpath(path)
|
||||
|
||||
if base_dir is None:
|
||||
base_dir = os.getcwd()
|
||||
|
||||
resolved_base = os.path.realpath(base_dir)
|
||||
resolved_path = os.path.realpath(
|
||||
os.path.join(resolved_base, path) if not os.path.isabs(path) else path
|
||||
)
|
||||
|
||||
# Ensure the resolved path is within the base directory.
|
||||
# When resolved_base already ends with a separator (e.g. the filesystem
|
||||
# root "/"), appending os.sep would double it ("//"), so use the base
|
||||
# as-is in that case.
|
||||
prefix = resolved_base if resolved_base.endswith(os.sep) else resolved_base + os.sep
|
||||
if not resolved_path.startswith(prefix) and resolved_path != resolved_base:
|
||||
raise ValueError(
|
||||
f"Path '{path}' resolves to '{resolved_path}' which is outside "
|
||||
f"the allowed directory '{resolved_base}'. "
|
||||
f"Set {_UNSAFE_PATHS_ENV}=true to bypass this check."
|
||||
)
|
||||
|
||||
return resolved_path
|
||||
|
||||
|
||||
def validate_directory_path(path: str, base_dir: str | None = None) -> str:
|
||||
"""Validate that a directory path is safe to read.
|
||||
|
||||
Same as :func:`validate_file_path` but also checks that the path
|
||||
is an existing directory.
|
||||
|
||||
Args:
|
||||
path: The directory path to validate.
|
||||
base_dir: Allowed root directory. Defaults to ``os.getcwd()``.
|
||||
|
||||
Returns:
|
||||
The resolved, validated absolute path.
|
||||
|
||||
Raises:
|
||||
ValueError: If the path escapes the allowed directory or is not a directory.
|
||||
"""
|
||||
validated = validate_file_path(path, base_dir)
|
||||
if not os.path.isdir(validated):
|
||||
raise ValueError(f"Path '{validated}' is not a directory.")
|
||||
return validated
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URL validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
# Private and reserved IP ranges that should not be accessed
|
||||
_BLOCKED_IPV4_NETWORKS = [
|
||||
ipaddress.ip_network("10.0.0.0/8"),
|
||||
ipaddress.ip_network("172.16.0.0/12"),
|
||||
ipaddress.ip_network("192.168.0.0/16"),
|
||||
ipaddress.ip_network("127.0.0.0/8"),
|
||||
ipaddress.ip_network("169.254.0.0/16"), # Link-local / cloud metadata
|
||||
ipaddress.ip_network("0.0.0.0/32"),
|
||||
]
|
||||
|
||||
_BLOCKED_IPV6_NETWORKS = [
|
||||
ipaddress.ip_network("::1/128"),
|
||||
ipaddress.ip_network("::/128"),
|
||||
ipaddress.ip_network("fc00::/7"), # Unique local addresses
|
||||
ipaddress.ip_network("fe80::/10"), # Link-local IPv6
|
||||
]
|
||||
|
||||
|
||||
def _is_private_or_reserved(ip_str: str) -> bool:
|
||||
"""Check if an IP address is private, reserved, or otherwise unsafe."""
|
||||
try:
|
||||
addr = ipaddress.ip_address(ip_str)
|
||||
# Unwrap IPv4-mapped IPv6 addresses (e.g., ::ffff:127.0.0.1) to IPv4
|
||||
# so they are only checked against IPv4 networks (avoids TypeError when
|
||||
# an IPv4Address is compared against an IPv6Network).
|
||||
if isinstance(addr, ipaddress.IPv6Address) and addr.ipv4_mapped:
|
||||
addr = addr.ipv4_mapped
|
||||
networks = (
|
||||
_BLOCKED_IPV4_NETWORKS
|
||||
if isinstance(addr, ipaddress.IPv4Address)
|
||||
else _BLOCKED_IPV6_NETWORKS
|
||||
)
|
||||
return any(addr in network for network in networks)
|
||||
except ValueError:
|
||||
return True # If we can't parse, block it
|
||||
|
||||
|
||||
def validate_url(url: str) -> str:
|
||||
"""Validate that a URL is safe to fetch.
|
||||
|
||||
Blocks ``file://`` scheme entirely. For ``http``/``https``, resolves
|
||||
DNS and checks that the target IP is not private or reserved (prevents
|
||||
SSRF to internal services and cloud metadata endpoints).
|
||||
|
||||
Args:
|
||||
url: The URL to validate.
|
||||
|
||||
Returns:
|
||||
The validated URL string.
|
||||
|
||||
Raises:
|
||||
ValueError: If the URL uses a blocked scheme or resolves to a
|
||||
private/reserved IP address.
|
||||
"""
|
||||
if _is_escape_hatch_enabled():
|
||||
logger.warning(
|
||||
"%s is enabled — skipping URL validation for: %s",
|
||||
_UNSAFE_PATHS_ENV,
|
||||
url,
|
||||
)
|
||||
return url
|
||||
|
||||
parsed = urlparse(url)
|
||||
|
||||
# Block file:// scheme
|
||||
if parsed.scheme == "file":
|
||||
raise ValueError(
|
||||
f"file:// URLs are not allowed: '{url}'. "
|
||||
f"Use a file path instead, or set {_UNSAFE_PATHS_ENV}=true to bypass."
|
||||
)
|
||||
|
||||
# Only allow http and https
|
||||
if parsed.scheme not in ("http", "https"):
|
||||
raise ValueError(
|
||||
f"URL scheme '{parsed.scheme}' is not allowed. Only http and https are supported."
|
||||
)
|
||||
|
||||
if not parsed.hostname:
|
||||
raise ValueError(f"URL has no hostname: '{url}'")
|
||||
|
||||
# Resolve DNS and check IPs
|
||||
try:
|
||||
addrinfos = socket.getaddrinfo(
|
||||
parsed.hostname, parsed.port or (443 if parsed.scheme == "https" else 80)
|
||||
)
|
||||
except socket.gaierror as exc:
|
||||
raise ValueError(f"Could not resolve hostname: '{parsed.hostname}'") from exc
|
||||
|
||||
for _family, _, _, _, sockaddr in addrinfos:
|
||||
ip_str = str(sockaddr[0])
|
||||
if _is_private_or_reserved(ip_str):
|
||||
raise ValueError(
|
||||
f"URL '{url}' resolves to private/reserved IP {ip_str}. "
|
||||
f"Access to internal networks is not allowed. "
|
||||
f"Set {_UNSAFE_PATHS_ENV}=true to bypass."
|
||||
)
|
||||
|
||||
return url
|
||||
@@ -7,6 +7,8 @@ from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
import requests
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
class BrightDataConfig(BaseModel):
|
||||
API_URL: str = "https://api.brightdata.com/request"
|
||||
@@ -134,6 +136,7 @@ class BrightDataWebUnlockerTool(BaseTool):
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
validate_url(url)
|
||||
try:
|
||||
response = requests.post(
|
||||
self.base_url, json=payload, headers=headers, timeout=30
|
||||
|
||||
@@ -3,6 +3,8 @@ from typing import Any
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path
|
||||
|
||||
|
||||
class ContextualAICreateAgentSchema(BaseModel):
|
||||
"""Schema for contextual create agent tool."""
|
||||
@@ -47,6 +49,7 @@ class ContextualAICreateAgentTool(BaseTool):
|
||||
document_paths: list[str],
|
||||
) -> str:
|
||||
"""Create a complete RAG pipeline with documents."""
|
||||
resolved_paths = [validate_file_path(doc_path) for doc_path in document_paths]
|
||||
try:
|
||||
import os
|
||||
|
||||
@@ -56,7 +59,7 @@ class ContextualAICreateAgentTool(BaseTool):
|
||||
|
||||
# Upload documents
|
||||
document_ids = []
|
||||
for doc_path in document_paths:
|
||||
for doc_path in resolved_paths:
|
||||
if not os.path.exists(doc_path):
|
||||
raise FileNotFoundError(f"Document not found: {doc_path}")
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path
|
||||
|
||||
|
||||
class ContextualAIParseSchema(BaseModel):
|
||||
"""Schema for contextual parse tool."""
|
||||
@@ -45,6 +47,7 @@ class ContextualAIParseTool(BaseTool):
|
||||
"""Parse a document using Contextual AI's parser."""
|
||||
if output_types is None:
|
||||
output_types = ["markdown-per-page"]
|
||||
file_path = validate_file_path(file_path)
|
||||
try:
|
||||
import json
|
||||
import os
|
||||
|
||||
@@ -4,6 +4,8 @@ from typing import Any
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_directory_path
|
||||
|
||||
|
||||
class FixedDirectoryReadToolSchema(BaseModel):
|
||||
"""Input for DirectoryReadTool."""
|
||||
@@ -39,6 +41,7 @@ class DirectoryReadTool(BaseTool):
|
||||
if directory is None:
|
||||
raise ValueError("Directory must be provided.")
|
||||
|
||||
directory = validate_directory_path(directory)
|
||||
if directory[-1] == "/":
|
||||
directory = directory[:-1]
|
||||
files_list = [
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
from crewai_tools.security.safe_path import validate_directory_path
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -37,6 +38,7 @@ class DirectorySearchTool(RagTool):
|
||||
self._generate_description()
|
||||
|
||||
def add(self, directory: str) -> None: # type: ignore[override]
|
||||
directory = validate_directory_path(directory)
|
||||
super().add(directory, data_type=DataType.DIRECTORY)
|
||||
|
||||
def _run( # type: ignore[override]
|
||||
|
||||
@@ -3,6 +3,8 @@ from typing import Any
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path
|
||||
|
||||
|
||||
class FileReadToolSchema(BaseModel):
|
||||
"""Input for FileReadTool."""
|
||||
@@ -76,6 +78,7 @@ class FileReadTool(BaseTool):
|
||||
if file_path is None:
|
||||
return "Error: No file path provided. Please provide a file path either in the constructor or as an argument."
|
||||
|
||||
file_path = validate_file_path(file_path)
|
||||
try:
|
||||
with open(file_path, "r") as file:
|
||||
if start_line == 1 and line_count is None:
|
||||
|
||||
@@ -5,6 +5,8 @@ import zipfile
|
||||
from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path
|
||||
|
||||
|
||||
class FileCompressorToolInput(BaseModel):
|
||||
"""Input schema for FileCompressorTool."""
|
||||
@@ -40,12 +42,15 @@ class FileCompressorTool(BaseTool):
|
||||
overwrite: bool = False,
|
||||
format: str = "zip",
|
||||
) -> str:
|
||||
input_path = validate_file_path(input_path)
|
||||
if not os.path.exists(input_path):
|
||||
return f"Input path '{input_path}' does not exist."
|
||||
|
||||
if not output_path:
|
||||
output_path = self._generate_output_path(input_path, format)
|
||||
|
||||
output_path = validate_file_path(output_path)
|
||||
|
||||
format_extension = {
|
||||
"zip": ".zip",
|
||||
"tar": ".tar",
|
||||
|
||||
@@ -5,6 +5,8 @@ from typing import Any
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
@@ -106,6 +108,7 @@ class FirecrawlCrawlWebsiteTool(BaseTool):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
url = validate_url(url)
|
||||
return self._firecrawl.crawl(url=url, poll_interval=2, **self.config)
|
||||
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@ from typing import Any
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, ConfigDict, Field, PrivateAttr
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
try:
|
||||
from firecrawl import FirecrawlApp # type: ignore[import-untyped]
|
||||
@@ -106,6 +108,7 @@ class FirecrawlScrapeWebsiteTool(BaseTool):
|
||||
if not self._firecrawl:
|
||||
raise RuntimeError("FirecrawlApp not properly initialized")
|
||||
|
||||
url = validate_url(url)
|
||||
return self._firecrawl.scrape(url=url, **self.config)
|
||||
|
||||
|
||||
|
||||
@@ -4,6 +4,8 @@ from typing import Any, Literal
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
class HyperbrowserLoadToolSchema(BaseModel):
|
||||
url: str = Field(description="Website URL")
|
||||
@@ -119,6 +121,7 @@ class HyperbrowserLoadTool(BaseTool):
|
||||
) from e
|
||||
|
||||
params = self._prepare_params(params)
|
||||
url = validate_url(url)
|
||||
|
||||
if operation == "scrape":
|
||||
scrape_params = StartScrapeJobParams(url=url, **params)
|
||||
|
||||
@@ -4,6 +4,8 @@ from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
import requests
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
class JinaScrapeWebsiteToolInput(BaseModel):
|
||||
"""Input schema for JinaScrapeWebsiteTool."""
|
||||
@@ -45,6 +47,7 @@ class JinaScrapeWebsiteTool(BaseTool):
|
||||
"Website URL must be provided either during initialization or execution"
|
||||
)
|
||||
|
||||
url = validate_url(url)
|
||||
response = requests.get(
|
||||
f"https://r.jina.ai/{url}", headers=self.headers, timeout=15
|
||||
)
|
||||
|
||||
@@ -11,6 +11,8 @@ from crewai.tools.base_tool import BaseTool
|
||||
from crewai.utilities.types import LLMMessage
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path
|
||||
|
||||
|
||||
class OCRToolSchema(BaseModel):
|
||||
"""Input schema for Optical Character Recognition Tool.
|
||||
@@ -98,5 +100,6 @@ class OCRTool(BaseTool):
|
||||
Returns:
|
||||
str: Base64-encoded image data as a UTF-8 string.
|
||||
"""
|
||||
image_path = validate_file_path(image_path)
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode()
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
from abc import ABC, abstractmethod
|
||||
import os
|
||||
from typing import Any, Literal, cast
|
||||
|
||||
from crewai.rag.core.base_embeddings_callable import EmbeddingFunction
|
||||
@@ -246,7 +247,94 @@ class RagTool(BaseTool):
|
||||
# Auto-detect type from extension
|
||||
rag_tool.add("path/to/document.pdf") # auto-detects PDF
|
||||
"""
|
||||
self.adapter.add(*args, **kwargs)
|
||||
# Validate file paths and URLs before adding to prevent
|
||||
# unauthorized file reads and SSRF.
|
||||
from urllib.parse import urlparse
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path, validate_url
|
||||
|
||||
def _check_url(value: str, label: str) -> None:
|
||||
try:
|
||||
validate_url(value)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Blocked unsafe {label}: {e}") from e
|
||||
|
||||
def _check_path(value: str, label: str) -> str:
|
||||
try:
|
||||
return validate_file_path(value)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Blocked unsafe {label}: {e}") from e
|
||||
|
||||
validated_args: list[ContentItem] = []
|
||||
for arg in args:
|
||||
source_ref = (
|
||||
str(arg.get("source", arg.get("content", "")))
|
||||
if isinstance(arg, dict)
|
||||
else str(arg)
|
||||
)
|
||||
|
||||
# Check if it's a URL — only catch urlparse-specific errors here;
|
||||
# validate_url's ValueError must propagate so it is never silently bypassed.
|
||||
try:
|
||||
parsed = urlparse(source_ref)
|
||||
except (ValueError, AttributeError):
|
||||
parsed = None
|
||||
|
||||
if parsed is not None and parsed.scheme in ("http", "https", "file"):
|
||||
try:
|
||||
validate_url(source_ref)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Blocked unsafe URL: {e}") from e
|
||||
validated_args.append(arg)
|
||||
continue
|
||||
|
||||
# Check if it looks like a file path (not a plain text string).
|
||||
# Check both os.sep (backslash on Windows) and "/" so that
|
||||
# forward-slash paths like "sub/file.txt" are caught on all platforms.
|
||||
if (
|
||||
os.path.sep in source_ref
|
||||
or "/" in source_ref
|
||||
or source_ref.startswith(".")
|
||||
or os.path.isabs(source_ref)
|
||||
):
|
||||
try:
|
||||
resolved_ref = validate_file_path(source_ref)
|
||||
except ValueError as e:
|
||||
raise ValueError(f"Blocked unsafe file path: {e}") from e
|
||||
# Use the resolved path to prevent symlink TOCTOU
|
||||
if isinstance(arg, dict):
|
||||
arg = {**arg}
|
||||
if "source" in arg:
|
||||
arg["source"] = resolved_ref
|
||||
elif "content" in arg:
|
||||
arg["content"] = resolved_ref
|
||||
else:
|
||||
arg = resolved_ref
|
||||
|
||||
validated_args.append(arg)
|
||||
|
||||
# Validate keyword path/URL arguments — these are equally user-controlled
|
||||
# and must not bypass the checks applied to positional args.
|
||||
if "path" in kwargs and kwargs.get("path") is not None:
|
||||
kwargs["path"] = _check_path(str(kwargs["path"]), "path")
|
||||
if "file_path" in kwargs and kwargs.get("file_path") is not None:
|
||||
kwargs["file_path"] = _check_path(str(kwargs["file_path"]), "file_path")
|
||||
|
||||
if "directory_path" in kwargs and kwargs.get("directory_path") is not None:
|
||||
kwargs["directory_path"] = _check_path(
|
||||
str(kwargs["directory_path"]), "directory_path"
|
||||
)
|
||||
|
||||
if "url" in kwargs and kwargs.get("url") is not None:
|
||||
_check_url(str(kwargs["url"]), "url")
|
||||
if "website" in kwargs and kwargs.get("website") is not None:
|
||||
_check_url(str(kwargs["website"]), "website")
|
||||
if "github_url" in kwargs and kwargs.get("github_url") is not None:
|
||||
_check_url(str(kwargs["github_url"]), "github_url")
|
||||
if "youtube_url" in kwargs and kwargs.get("youtube_url") is not None:
|
||||
_check_url(str(kwargs["youtube_url"]), "youtube_url")
|
||||
|
||||
self.adapter.add(*validated_args, **kwargs)
|
||||
|
||||
def _run(
|
||||
self,
|
||||
|
||||
@@ -5,6 +5,8 @@ from crewai.tools import BaseTool
|
||||
from pydantic import BaseModel, Field
|
||||
import requests
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -81,6 +83,7 @@ class ScrapeElementFromWebsiteTool(BaseTool):
|
||||
if website_url is None or css_element is None:
|
||||
raise ValueError("Both website_url and css_element must be provided.")
|
||||
|
||||
website_url = validate_url(website_url)
|
||||
page = requests.get(
|
||||
website_url,
|
||||
headers=self.headers,
|
||||
|
||||
@@ -5,6 +5,8 @@ from typing import Any
|
||||
from pydantic import Field
|
||||
import requests
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
try:
|
||||
from bs4 import BeautifulSoup
|
||||
@@ -73,6 +75,7 @@ class ScrapeWebsiteTool(BaseTool):
|
||||
if website_url is None:
|
||||
raise ValueError("Website URL must be provided.")
|
||||
|
||||
website_url = validate_url(website_url)
|
||||
page = requests.get(
|
||||
website_url,
|
||||
timeout=15,
|
||||
|
||||
@@ -5,6 +5,8 @@ from typing import Any, Literal
|
||||
from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
logger = logging.getLogger(__file__)
|
||||
|
||||
@@ -72,6 +74,7 @@ class ScrapflyScrapeWebsiteTool(BaseTool):
|
||||
) -> str | None:
|
||||
from scrapfly import ScrapeConfig
|
||||
|
||||
url = validate_url(url)
|
||||
scrape_config = scrape_config if scrape_config is not None else {}
|
||||
try:
|
||||
response = self.scrapfly.scrape( # type: ignore[union-attr]
|
||||
|
||||
@@ -5,6 +5,8 @@ from crewai.tools import BaseTool, EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
import requests
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
|
||||
|
||||
class SerperScrapeWebsiteInput(BaseModel):
|
||||
"""Input schema for SerperScrapeWebsite."""
|
||||
@@ -42,6 +44,7 @@ class SerperScrapeWebsiteTool(BaseTool):
|
||||
Returns:
|
||||
Scraped website content as a string
|
||||
"""
|
||||
validate_url(url)
|
||||
try:
|
||||
# Serper API endpoint
|
||||
api_url = "https://scrape.serper.dev"
|
||||
|
||||
@@ -5,6 +5,7 @@ from crewai.tools import EnvVar
|
||||
from pydantic import BaseModel, Field
|
||||
import requests
|
||||
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -48,6 +49,7 @@ class SerplyWebpageToMarkdownTool(RagTool):
|
||||
if self.proxy_location and not self.headers.get("X-Proxy-Location"):
|
||||
self.headers["X-Proxy-Location"] = self.proxy_location
|
||||
|
||||
validate_url(url)
|
||||
data = {"url": url, "method": "GET", "response_type": "markdown"}
|
||||
response = requests.request(
|
||||
"POST",
|
||||
|
||||
@@ -7,6 +7,8 @@ from crewai.tools import BaseTool, EnvVar
|
||||
from crewai.utilities.types import LLMMessage
|
||||
from pydantic import BaseModel, Field, PrivateAttr, field_validator
|
||||
|
||||
from crewai_tools.security.safe_path import validate_file_path
|
||||
|
||||
|
||||
class ImagePromptSchema(BaseModel):
|
||||
"""Input for Vision Tool."""
|
||||
@@ -135,5 +137,6 @@ class VisionTool(BaseTool):
|
||||
Returns:
|
||||
Base64-encoded image data
|
||||
"""
|
||||
image_path = validate_file_path(image_path)
|
||||
with open(image_path, "rb") as image_file:
|
||||
return base64.b64encode(image_file.read()).decode()
|
||||
|
||||
@@ -3,6 +3,7 @@ from typing import Any
|
||||
from pydantic import BaseModel, Field
|
||||
|
||||
from crewai_tools.rag.data_types import DataType
|
||||
from crewai_tools.security.safe_path import validate_url
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@@ -37,6 +38,7 @@ class WebsiteSearchTool(RagTool):
|
||||
self._generate_description()
|
||||
|
||||
def add(self, website: str) -> None: # type: ignore[override]
|
||||
website = validate_url(website)
|
||||
super().add(website, data_type=DataType.WEBSITE)
|
||||
|
||||
def _run( # type: ignore[override]
|
||||
|
||||
10
lib/crewai-tools/src/crewai_tools/utilities/safe_path.py
Normal file
10
lib/crewai-tools/src/crewai_tools/utilities/safe_path.py
Normal file
@@ -0,0 +1,10 @@
|
||||
"""Backward-compatible re-export from crewai_tools.security.safe_path."""
|
||||
|
||||
from crewai_tools.security.safe_path import (
|
||||
validate_directory_path,
|
||||
validate_file_path,
|
||||
validate_url,
|
||||
)
|
||||
|
||||
|
||||
__all__ = ["validate_directory_path", "validate_file_path", "validate_url"]
|
||||
@@ -3,10 +3,21 @@ from tempfile import TemporaryDirectory
|
||||
from typing import cast
|
||||
from unittest.mock import MagicMock, Mock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai_tools.adapters.crewai_rag_adapter import CrewAIRagAdapter
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def allow_tmp_paths(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Allow absolute paths outside CWD (e.g. /tmp/) for these RagTool tests.
|
||||
|
||||
Path validation is tested separately in test_rag_tool_path_validation.py.
|
||||
"""
|
||||
monkeypatch.setenv("CREWAI_TOOLS_ALLOW_UNSAFE_PATHS", "true")
|
||||
|
||||
|
||||
@patch("crewai_tools.adapters.crewai_rag_adapter.get_rag_client")
|
||||
@patch("crewai_tools.adapters.crewai_rag_adapter.create_client")
|
||||
def test_rag_tool_initialization(
|
||||
|
||||
@@ -10,6 +10,15 @@ from crewai_tools.rag.data_types import DataType
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def allow_tmp_paths(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Allow absolute paths outside CWD (e.g. /tmp/) for these data-type tests.
|
||||
|
||||
Path validation is tested separately in test_rag_tool_path_validation.py.
|
||||
"""
|
||||
monkeypatch.setenv("CREWAI_TOOLS_ALLOW_UNSAFE_PATHS", "true")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_rag_client() -> MagicMock:
|
||||
"""Create a mock RAG client for testing."""
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
"""Tests for path and URL validation in RagTool.add() — both positional and keyword args."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock, patch
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai_tools.tools.rag.rag_tool import RagTool
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def mock_rag_client() -> MagicMock:
|
||||
mock_client = MagicMock()
|
||||
mock_client.get_or_create_collection = MagicMock(return_value=None)
|
||||
mock_client.add_documents = MagicMock(return_value=None)
|
||||
mock_client.search = MagicMock(return_value=[])
|
||||
return mock_client
|
||||
|
||||
|
||||
@pytest.fixture()
|
||||
def tool(mock_rag_client: MagicMock) -> RagTool:
|
||||
with (
|
||||
patch("crewai_tools.adapters.crewai_rag_adapter.get_rag_client", return_value=mock_rag_client),
|
||||
patch("crewai_tools.adapters.crewai_rag_adapter.create_client", return_value=mock_rag_client),
|
||||
):
|
||||
return RagTool()
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Positional arg validation (existing behaviour, regression guard)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestPositionalArgValidation:
|
||||
def test_blocks_traversal_in_positional_arg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe"):
|
||||
tool.add("../../etc/passwd")
|
||||
|
||||
def test_blocks_file_url_in_positional_arg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe"):
|
||||
tool.add("file:///etc/passwd")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Keyword arg validation (the newly fixed gap)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestKwargPathValidation:
|
||||
def test_blocks_traversal_via_path_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe path"):
|
||||
tool.add(path="../../etc/passwd")
|
||||
|
||||
def test_blocks_traversal_via_file_path_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe file_path"):
|
||||
tool.add(file_path="/etc/passwd")
|
||||
|
||||
def test_blocks_traversal_via_directory_path_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe directory_path"):
|
||||
tool.add(directory_path="../../sensitive_dir")
|
||||
|
||||
def test_blocks_file_url_via_url_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe url"):
|
||||
tool.add(url="file:///etc/passwd")
|
||||
|
||||
def test_blocks_private_ip_via_url_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe url"):
|
||||
tool.add(url="http://169.254.169.254/latest/meta-data/")
|
||||
|
||||
def test_blocks_private_ip_via_website_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe website"):
|
||||
tool.add(website="http://192.168.1.1/")
|
||||
|
||||
def test_blocks_file_url_via_github_url_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe github_url"):
|
||||
tool.add(github_url="file:///etc/passwd")
|
||||
|
||||
def test_blocks_file_url_via_youtube_url_kwarg(self, tool):
|
||||
with pytest.raises(ValueError, match="Blocked unsafe youtube_url"):
|
||||
tool.add(youtube_url="file:///etc/passwd")
|
||||
|
||||
@@ -23,6 +23,15 @@ from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
import pytest
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def allow_tmp_paths(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Allow absolute paths outside CWD (e.g. /tmp/) for these search-tool tests.
|
||||
|
||||
Path validation is tested separately in test_rag_tool_path_validation.py.
|
||||
"""
|
||||
monkeypatch.setenv("CREWAI_TOOLS_ALLOW_UNSAFE_PATHS", "true")
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def mock_adapter():
|
||||
mock_adapter = MagicMock(spec=Adapter)
|
||||
|
||||
0
lib/crewai-tools/tests/utilities/__init__.py
Normal file
0
lib/crewai-tools/tests/utilities/__init__.py
Normal file
170
lib/crewai-tools/tests/utilities/test_safe_path.py
Normal file
170
lib/crewai-tools/tests/utilities/test_safe_path.py
Normal file
@@ -0,0 +1,170 @@
|
||||
"""Tests for path and URL validation utilities."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
|
||||
import pytest
|
||||
|
||||
from crewai_tools.security.safe_path import (
|
||||
validate_directory_path,
|
||||
validate_file_path,
|
||||
validate_url,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# File path validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestValidateFilePath:
|
||||
"""Tests for validate_file_path."""
|
||||
|
||||
def test_valid_relative_path(self, tmp_path):
|
||||
"""Normal relative path within the base directory."""
|
||||
(tmp_path / "data.json").touch()
|
||||
result = validate_file_path("data.json", str(tmp_path))
|
||||
assert result == str(tmp_path / "data.json")
|
||||
|
||||
def test_valid_nested_path(self, tmp_path):
|
||||
"""Nested path within base directory."""
|
||||
(tmp_path / "sub").mkdir()
|
||||
(tmp_path / "sub" / "file.txt").touch()
|
||||
result = validate_file_path("sub/file.txt", str(tmp_path))
|
||||
assert result == str(tmp_path / "sub" / "file.txt")
|
||||
|
||||
def test_rejects_dotdot_traversal(self, tmp_path):
|
||||
"""Reject ../ traversal that escapes base_dir."""
|
||||
with pytest.raises(ValueError, match="outside the allowed directory"):
|
||||
validate_file_path("../../etc/passwd", str(tmp_path))
|
||||
|
||||
def test_rejects_absolute_path_outside_base(self, tmp_path):
|
||||
"""Reject absolute path outside base_dir."""
|
||||
with pytest.raises(ValueError, match="outside the allowed directory"):
|
||||
validate_file_path("/etc/passwd", str(tmp_path))
|
||||
|
||||
def test_allows_absolute_path_inside_base(self, tmp_path):
|
||||
"""Allow absolute path that's inside base_dir."""
|
||||
(tmp_path / "ok.txt").touch()
|
||||
result = validate_file_path(str(tmp_path / "ok.txt"), str(tmp_path))
|
||||
assert result == str(tmp_path / "ok.txt")
|
||||
|
||||
def test_rejects_symlink_escape(self, tmp_path):
|
||||
"""Reject symlinks that point outside base_dir."""
|
||||
link = tmp_path / "sneaky_link"
|
||||
# Create a symlink pointing to /etc/passwd
|
||||
os.symlink("/etc/passwd", str(link))
|
||||
with pytest.raises(ValueError, match="outside the allowed directory"):
|
||||
validate_file_path("sneaky_link", str(tmp_path))
|
||||
|
||||
def test_defaults_to_cwd(self):
|
||||
"""When no base_dir is given, use cwd."""
|
||||
cwd = os.getcwd()
|
||||
# A file in cwd should be valid
|
||||
result = validate_file_path(".", None)
|
||||
assert result == os.path.realpath(cwd)
|
||||
|
||||
def test_escape_hatch(self, tmp_path, monkeypatch):
|
||||
"""CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true bypasses validation."""
|
||||
monkeypatch.setenv("CREWAI_TOOLS_ALLOW_UNSAFE_PATHS", "true")
|
||||
# This would normally be rejected
|
||||
result = validate_file_path("/etc/passwd", str(tmp_path))
|
||||
assert result == os.path.realpath("/etc/passwd")
|
||||
|
||||
|
||||
class TestValidateDirectoryPath:
|
||||
"""Tests for validate_directory_path."""
|
||||
|
||||
def test_valid_directory(self, tmp_path):
|
||||
(tmp_path / "subdir").mkdir()
|
||||
result = validate_directory_path("subdir", str(tmp_path))
|
||||
assert result == str(tmp_path / "subdir")
|
||||
|
||||
def test_rejects_file_as_directory(self, tmp_path):
|
||||
(tmp_path / "file.txt").touch()
|
||||
with pytest.raises(ValueError, match="not a directory"):
|
||||
validate_directory_path("file.txt", str(tmp_path))
|
||||
|
||||
def test_rejects_traversal(self, tmp_path):
|
||||
with pytest.raises(ValueError, match="outside the allowed directory"):
|
||||
validate_directory_path("../../", str(tmp_path))
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# URL validation
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestValidateUrl:
|
||||
"""Tests for validate_url."""
|
||||
|
||||
def test_valid_https_url(self):
|
||||
"""Normal HTTPS URL should pass."""
|
||||
result = validate_url("https://example.com/data.json")
|
||||
assert result == "https://example.com/data.json"
|
||||
|
||||
def test_valid_http_url(self):
|
||||
"""Normal HTTP URL should pass."""
|
||||
result = validate_url("http://example.com/api")
|
||||
assert result == "http://example.com/api"
|
||||
|
||||
def test_blocks_file_scheme(self):
|
||||
"""file:// URLs must be blocked."""
|
||||
with pytest.raises(ValueError, match="file:// URLs are not allowed"):
|
||||
validate_url("file:///etc/passwd")
|
||||
|
||||
def test_blocks_file_scheme_with_host(self):
|
||||
with pytest.raises(ValueError, match="file:// URLs are not allowed"):
|
||||
validate_url("file://localhost/etc/shadow")
|
||||
|
||||
def test_blocks_localhost(self):
|
||||
"""localhost must be blocked (resolves to 127.0.0.1)."""
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://localhost/admin")
|
||||
|
||||
def test_blocks_127_0_0_1(self):
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://127.0.0.1/admin")
|
||||
|
||||
def test_blocks_cloud_metadata(self):
|
||||
"""AWS/GCP/Azure metadata endpoint must be blocked."""
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://169.254.169.254/latest/meta-data/")
|
||||
|
||||
def test_blocks_private_10_range(self):
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://10.0.0.1/internal")
|
||||
|
||||
def test_blocks_private_172_range(self):
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://172.16.0.1/internal")
|
||||
|
||||
def test_blocks_private_192_range(self):
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://192.168.1.1/router")
|
||||
|
||||
def test_blocks_zero_address(self):
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://0.0.0.0/")
|
||||
|
||||
def test_blocks_ipv6_localhost(self):
|
||||
with pytest.raises(ValueError, match="private/reserved IP"):
|
||||
validate_url("http://[::1]/admin")
|
||||
|
||||
def test_blocks_ftp_scheme(self):
|
||||
with pytest.raises(ValueError, match="not allowed"):
|
||||
validate_url("ftp://example.com/file")
|
||||
|
||||
def test_blocks_empty_hostname(self):
|
||||
with pytest.raises(ValueError, match="no hostname"):
|
||||
validate_url("http:///path")
|
||||
|
||||
def test_blocks_unresolvable_host(self):
|
||||
with pytest.raises(ValueError, match="Could not resolve"):
|
||||
validate_url("http://this-host-definitely-does-not-exist-abc123.com/")
|
||||
|
||||
def test_escape_hatch(self, monkeypatch):
|
||||
"""CREWAI_TOOLS_ALLOW_UNSAFE_PATHS=true bypasses URL validation."""
|
||||
monkeypatch.setenv("CREWAI_TOOLS_ALLOW_UNSAFE_PATHS", "true")
|
||||
# file:// would normally be blocked
|
||||
result = validate_url("file:///etc/passwd")
|
||||
assert result == "file:///etc/passwd"
|
||||
@@ -55,7 +55,7 @@ Repository = "https://github.com/crewAIInc/crewAI"
|
||||
|
||||
[project.optional-dependencies]
|
||||
tools = [
|
||||
"crewai-tools==1.14.0a3",
|
||||
"crewai-tools==1.14.0",
|
||||
]
|
||||
embeddings = [
|
||||
"tiktoken~=0.8.0"
|
||||
|
||||
@@ -46,7 +46,7 @@ def _suppress_pydantic_deprecation_warnings() -> None:
|
||||
|
||||
_suppress_pydantic_deprecation_warnings()
|
||||
|
||||
__version__ = "1.14.0a3"
|
||||
__version__ = "1.14.0"
|
||||
_telemetry_submitted = False
|
||||
|
||||
|
||||
|
||||
329
lib/crewai/src/crewai/cli/checkpoint_cli.py
Normal file
329
lib/crewai/src/crewai/cli/checkpoint_cli.py
Normal file
@@ -0,0 +1,329 @@
|
||||
"""CLI commands for inspecting checkpoint files."""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime
|
||||
import glob
|
||||
import json
|
||||
import os
|
||||
import sqlite3
|
||||
from typing import Any
|
||||
|
||||
import click
|
||||
|
||||
|
||||
_SQLITE_MAGIC = b"SQLite format 3\x00"
|
||||
|
||||
_SELECT_ALL = """
|
||||
SELECT id, created_at, json(data)
|
||||
FROM checkpoints
|
||||
ORDER BY rowid DESC
|
||||
"""
|
||||
|
||||
_SELECT_ONE = """
|
||||
SELECT id, created_at, json(data)
|
||||
FROM checkpoints
|
||||
WHERE id = ?
|
||||
"""
|
||||
|
||||
_SELECT_LATEST = """
|
||||
SELECT id, created_at, json(data)
|
||||
FROM checkpoints
|
||||
ORDER BY rowid DESC
|
||||
LIMIT 1
|
||||
"""
|
||||
|
||||
|
||||
def _is_sqlite(path: str) -> bool:
|
||||
"""Check if a file is a SQLite database by reading its magic bytes."""
|
||||
if not os.path.isfile(path):
|
||||
return False
|
||||
try:
|
||||
with open(path, "rb") as f:
|
||||
return f.read(16) == _SQLITE_MAGIC
|
||||
except OSError:
|
||||
return False
|
||||
|
||||
|
||||
def _parse_checkpoint_json(raw: str, source: str) -> dict[str, Any]:
|
||||
"""Parse checkpoint JSON into metadata dict."""
|
||||
data = json.loads(raw)
|
||||
entities = data.get("entities", [])
|
||||
nodes = data.get("event_record", {}).get("nodes", {})
|
||||
event_count = len(nodes)
|
||||
|
||||
trigger_event = None
|
||||
if nodes:
|
||||
last_node = max(
|
||||
nodes.values(),
|
||||
key=lambda n: n.get("event", {}).get("emission_sequence") or 0,
|
||||
)
|
||||
trigger_event = last_node.get("event", {}).get("type")
|
||||
|
||||
parsed_entities: list[dict[str, Any]] = []
|
||||
for entity in entities:
|
||||
tasks = entity.get("tasks", [])
|
||||
completed = sum(1 for t in tasks if t.get("output") is not None)
|
||||
info: dict[str, Any] = {
|
||||
"type": entity.get("entity_type", "unknown"),
|
||||
"name": entity.get("name"),
|
||||
"id": entity.get("id"),
|
||||
}
|
||||
if tasks:
|
||||
info["tasks_completed"] = completed
|
||||
info["tasks_total"] = len(tasks)
|
||||
info["tasks"] = [
|
||||
{
|
||||
"description": t.get("description", ""),
|
||||
"completed": t.get("output") is not None,
|
||||
}
|
||||
for t in tasks
|
||||
]
|
||||
parsed_entities.append(info)
|
||||
|
||||
return {
|
||||
"source": source,
|
||||
"event_count": event_count,
|
||||
"trigger": trigger_event,
|
||||
"entities": parsed_entities,
|
||||
}
|
||||
|
||||
|
||||
def _format_size(size: int) -> str:
|
||||
if size < 1024:
|
||||
return f"{size}B"
|
||||
if size < 1024 * 1024:
|
||||
return f"{size / 1024:.1f}KB"
|
||||
return f"{size / 1024 / 1024:.1f}MB"
|
||||
|
||||
|
||||
def _ts_from_name(name: str) -> str | None:
|
||||
"""Extract timestamp from checkpoint ID or filename."""
|
||||
stem = os.path.basename(name).split("_")[0].removesuffix(".json")
|
||||
try:
|
||||
dt = datetime.strptime(stem, "%Y%m%dT%H%M%S")
|
||||
except ValueError:
|
||||
return None
|
||||
return dt.strftime("%Y-%m-%d %H:%M:%S")
|
||||
|
||||
|
||||
def _entity_summary(entities: list[dict[str, Any]]) -> str:
|
||||
parts = []
|
||||
for ent in entities:
|
||||
etype = ent.get("type", "unknown")
|
||||
ename = ent.get("name", "")
|
||||
completed = ent.get("tasks_completed")
|
||||
total = ent.get("tasks_total")
|
||||
if completed is not None and total is not None:
|
||||
parts.append(f"{etype}:{ename} [{completed}/{total} tasks]")
|
||||
else:
|
||||
parts.append(f"{etype}:{ename}")
|
||||
return ", ".join(parts) if parts else "empty"
|
||||
|
||||
|
||||
# --- JSON directory ---
|
||||
|
||||
|
||||
def _list_json(location: str) -> list[dict[str, Any]]:
|
||||
pattern = os.path.join(location, "*.json")
|
||||
results = []
|
||||
for path in sorted(glob.glob(pattern), key=os.path.getmtime, reverse=True):
|
||||
name = os.path.basename(path)
|
||||
try:
|
||||
with open(path) as f:
|
||||
raw = f.read()
|
||||
meta = _parse_checkpoint_json(raw, source=name)
|
||||
meta["name"] = name
|
||||
meta["ts"] = _ts_from_name(name)
|
||||
meta["size"] = os.path.getsize(path)
|
||||
meta["path"] = path
|
||||
except Exception:
|
||||
meta = {"name": name, "ts": None, "size": 0, "entities": [], "source": name}
|
||||
results.append(meta)
|
||||
return results
|
||||
|
||||
|
||||
def _info_json_latest(location: str) -> dict[str, Any] | None:
|
||||
pattern = os.path.join(location, "*.json")
|
||||
files = sorted(glob.glob(pattern), key=os.path.getmtime, reverse=True)
|
||||
if not files:
|
||||
return None
|
||||
path = files[0]
|
||||
with open(path) as f:
|
||||
raw = f.read()
|
||||
meta = _parse_checkpoint_json(raw, source=os.path.basename(path))
|
||||
meta["name"] = os.path.basename(path)
|
||||
meta["ts"] = _ts_from_name(path)
|
||||
meta["size"] = os.path.getsize(path)
|
||||
meta["path"] = path
|
||||
return meta
|
||||
|
||||
|
||||
def _info_json_file(path: str) -> dict[str, Any]:
|
||||
with open(path) as f:
|
||||
raw = f.read()
|
||||
meta = _parse_checkpoint_json(raw, source=os.path.basename(path))
|
||||
meta["name"] = os.path.basename(path)
|
||||
meta["ts"] = _ts_from_name(path)
|
||||
meta["size"] = os.path.getsize(path)
|
||||
meta["path"] = path
|
||||
return meta
|
||||
|
||||
|
||||
# --- SQLite ---
|
||||
|
||||
|
||||
def _list_sqlite(db_path: str) -> list[dict[str, Any]]:
|
||||
results = []
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
for row in conn.execute(_SELECT_ALL):
|
||||
checkpoint_id, created_at, raw = row
|
||||
try:
|
||||
meta = _parse_checkpoint_json(raw, source=checkpoint_id)
|
||||
meta["name"] = checkpoint_id
|
||||
meta["ts"] = _ts_from_name(checkpoint_id) or created_at
|
||||
except Exception:
|
||||
meta = {
|
||||
"name": checkpoint_id,
|
||||
"ts": created_at,
|
||||
"entities": [],
|
||||
"source": checkpoint_id,
|
||||
}
|
||||
results.append(meta)
|
||||
return results
|
||||
|
||||
|
||||
def _info_sqlite_latest(db_path: str) -> dict[str, Any] | None:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
row = conn.execute(_SELECT_LATEST).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
checkpoint_id, created_at, raw = row
|
||||
meta = _parse_checkpoint_json(raw, source=checkpoint_id)
|
||||
meta["name"] = checkpoint_id
|
||||
meta["ts"] = _ts_from_name(checkpoint_id) or created_at
|
||||
meta["db"] = db_path
|
||||
return meta
|
||||
|
||||
|
||||
def _info_sqlite_id(db_path: str, checkpoint_id: str) -> dict[str, Any] | None:
|
||||
with sqlite3.connect(db_path) as conn:
|
||||
row = conn.execute(_SELECT_ONE, (checkpoint_id,)).fetchone()
|
||||
if not row:
|
||||
return None
|
||||
cid, created_at, raw = row
|
||||
meta = _parse_checkpoint_json(raw, source=cid)
|
||||
meta["name"] = cid
|
||||
meta["ts"] = _ts_from_name(cid) or created_at
|
||||
meta["db"] = db_path
|
||||
return meta
|
||||
|
||||
|
||||
# --- Public API ---
|
||||
|
||||
|
||||
def list_checkpoints(location: str) -> None:
|
||||
"""List all checkpoints at a location."""
|
||||
if _is_sqlite(location):
|
||||
entries = _list_sqlite(location)
|
||||
label = f"SQLite: {location}"
|
||||
elif os.path.isdir(location):
|
||||
entries = _list_json(location)
|
||||
label = location
|
||||
else:
|
||||
click.echo(f"Not a directory or SQLite database: {location}")
|
||||
return
|
||||
|
||||
if not entries:
|
||||
click.echo(f"No checkpoints found in {label}")
|
||||
return
|
||||
|
||||
click.echo(f"Found {len(entries)} checkpoint(s) in {label}\n")
|
||||
|
||||
for entry in entries:
|
||||
ts = entry.get("ts") or "unknown"
|
||||
name = entry.get("name", "")
|
||||
size = _format_size(entry["size"]) if "size" in entry else ""
|
||||
trigger = entry.get("trigger") or ""
|
||||
summary = _entity_summary(entry.get("entities", []))
|
||||
parts = [name, ts]
|
||||
if size:
|
||||
parts.append(size)
|
||||
if trigger:
|
||||
parts.append(trigger)
|
||||
parts.append(summary)
|
||||
click.echo(f" {' '.join(parts)}")
|
||||
|
||||
|
||||
def info_checkpoint(path: str) -> None:
|
||||
"""Show details of a single checkpoint."""
|
||||
meta: dict[str, Any] | None = None
|
||||
|
||||
# db_path#checkpoint_id format
|
||||
if "#" in path:
|
||||
db_path, checkpoint_id = path.rsplit("#", 1)
|
||||
if _is_sqlite(db_path):
|
||||
meta = _info_sqlite_id(db_path, checkpoint_id)
|
||||
if not meta:
|
||||
click.echo(f"Checkpoint not found: {checkpoint_id}")
|
||||
return
|
||||
|
||||
# SQLite file — show latest
|
||||
if meta is None and _is_sqlite(path):
|
||||
meta = _info_sqlite_latest(path)
|
||||
if not meta:
|
||||
click.echo(f"No checkpoints in database: {path}")
|
||||
return
|
||||
click.echo(f"Latest checkpoint: {meta['name']}\n")
|
||||
|
||||
# Directory — show latest JSON
|
||||
if meta is None and os.path.isdir(path):
|
||||
meta = _info_json_latest(path)
|
||||
if not meta:
|
||||
click.echo(f"No checkpoints found in {path}")
|
||||
return
|
||||
click.echo(f"Latest checkpoint: {meta['name']}\n")
|
||||
|
||||
# Specific JSON file
|
||||
if meta is None and os.path.isfile(path):
|
||||
try:
|
||||
meta = _info_json_file(path)
|
||||
except Exception as exc:
|
||||
click.echo(f"Failed to read checkpoint: {exc}")
|
||||
return
|
||||
|
||||
if meta is None:
|
||||
click.echo(f"Not found: {path}")
|
||||
return
|
||||
|
||||
_print_info(meta)
|
||||
|
||||
|
||||
def _print_info(meta: dict[str, Any]) -> None:
|
||||
ts = meta.get("ts") or "unknown"
|
||||
source = meta.get("path") or meta.get("db") or meta.get("source", "")
|
||||
click.echo(f"Source: {source}")
|
||||
click.echo(f"Name: {meta.get('name', '')}")
|
||||
click.echo(f"Time: {ts}")
|
||||
if "size" in meta:
|
||||
click.echo(f"Size: {_format_size(meta['size'])}")
|
||||
click.echo(f"Events: {meta.get('event_count', 0)}")
|
||||
trigger = meta.get("trigger")
|
||||
if trigger:
|
||||
click.echo(f"Trigger: {trigger}")
|
||||
|
||||
for ent in meta.get("entities", []):
|
||||
eid = str(ent.get("id", ""))[:8]
|
||||
click.echo(f"\n {ent['type']}: {ent.get('name', 'unnamed')} ({eid}...)")
|
||||
|
||||
tasks = ent.get("tasks")
|
||||
if isinstance(tasks, list):
|
||||
click.echo(
|
||||
f" Tasks: {ent['tasks_completed']}/{ent['tasks_total']} completed"
|
||||
)
|
||||
for i, task in enumerate(tasks):
|
||||
status = "done" if task.get("completed") else "pending"
|
||||
desc = str(task.get("description", ""))
|
||||
if len(desc) > 70:
|
||||
desc = desc[:67] + "..."
|
||||
click.echo(f" {i + 1}. [{status}] {desc}")
|
||||
@@ -609,7 +609,6 @@ def env() -> None:
|
||||
@env.command("view")
|
||||
def env_view() -> None:
|
||||
"""View tracing-related environment variables."""
|
||||
import os
|
||||
from pathlib import Path
|
||||
|
||||
from rich.console import Console
|
||||
@@ -738,7 +737,6 @@ def traces_disable() -> None:
|
||||
@traces.command("status")
|
||||
def traces_status() -> None:
|
||||
"""Show current trace collection status."""
|
||||
import os
|
||||
|
||||
from rich.console import Console
|
||||
from rich.panel import Panel
|
||||
@@ -788,5 +786,28 @@ def traces_status() -> None:
|
||||
console.print(panel)
|
||||
|
||||
|
||||
@crewai.group()
|
||||
def checkpoint() -> None:
|
||||
"""Inspect checkpoint files."""
|
||||
|
||||
|
||||
@checkpoint.command("list")
|
||||
@click.argument("location", default="./.checkpoints")
|
||||
def checkpoint_list(location: str) -> None:
|
||||
"""List checkpoints in a directory."""
|
||||
from crewai.cli.checkpoint_cli import list_checkpoints
|
||||
|
||||
list_checkpoints(location)
|
||||
|
||||
|
||||
@checkpoint.command("info")
|
||||
@click.argument("path", default="./.checkpoints")
|
||||
def checkpoint_info(path: str) -> None:
|
||||
"""Show details of a checkpoint. Pass a file or directory for latest."""
|
||||
from crewai.cli.checkpoint_cli import info_checkpoint
|
||||
|
||||
info_checkpoint(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
crewai()
|
||||
|
||||
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
|
||||
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||
requires-python = ">=3.10,<3.14"
|
||||
dependencies = [
|
||||
"crewai[tools]==1.14.0a3"
|
||||
"crewai[tools]==1.14.0"
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -5,7 +5,7 @@ description = "{{name}} using crewAI"
|
||||
authors = [{ name = "Your Name", email = "you@example.com" }]
|
||||
requires-python = ">=3.10,<3.14"
|
||||
dependencies = [
|
||||
"crewai[tools]==1.14.0a3"
|
||||
"crewai[tools]==1.14.0"
|
||||
]
|
||||
|
||||
[project.scripts]
|
||||
|
||||
@@ -5,7 +5,7 @@ description = "Power up your crews with {{folder_name}}"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10,<3.14"
|
||||
dependencies = [
|
||||
"crewai[tools]==1.14.0a3"
|
||||
"crewai[tools]==1.14.0"
|
||||
]
|
||||
|
||||
[tool.crewai]
|
||||
|
||||
@@ -165,9 +165,10 @@ class CheckpointConfig(BaseModel):
|
||||
automatically whenever the specified event(s) fire.
|
||||
"""
|
||||
|
||||
directory: str = Field(
|
||||
location: str = Field(
|
||||
default="./.checkpoints",
|
||||
description="Filesystem path where checkpoint JSON files are written.",
|
||||
description="Storage destination. For JsonProvider this is a directory "
|
||||
"path; for SqliteProvider it is a database file path.",
|
||||
)
|
||||
on_events: list[CheckpointEventType | Literal["*"]] = Field(
|
||||
default=["task_completed"],
|
||||
@@ -180,8 +181,8 @@ class CheckpointConfig(BaseModel):
|
||||
)
|
||||
max_checkpoints: int | None = Field(
|
||||
default=None,
|
||||
description="Maximum checkpoint files to keep. Oldest are pruned first. "
|
||||
"None means keep all.",
|
||||
description="Maximum checkpoints to keep. Oldest are pruned after "
|
||||
"each write. None means keep all.",
|
||||
)
|
||||
|
||||
@property
|
||||
|
||||
@@ -7,9 +7,7 @@ avoids per-event overhead when no entity uses checkpointing.
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
import threading
|
||||
from typing import Any
|
||||
|
||||
@@ -105,29 +103,13 @@ def _find_checkpoint(source: Any) -> CheckpointConfig | None:
|
||||
|
||||
|
||||
def _do_checkpoint(state: RuntimeState, cfg: CheckpointConfig) -> None:
|
||||
"""Write a checkpoint synchronously and optionally prune old files."""
|
||||
"""Write a checkpoint and prune old ones if configured."""
|
||||
_prepare_entities(state.root)
|
||||
data = state.model_dump_json()
|
||||
cfg.provider.checkpoint(data, cfg.directory)
|
||||
cfg.provider.checkpoint(data, cfg.location)
|
||||
|
||||
if cfg.max_checkpoints is not None:
|
||||
_prune(cfg.directory, cfg.max_checkpoints)
|
||||
|
||||
|
||||
def _safe_remove(path: str) -> None:
|
||||
try:
|
||||
os.remove(path)
|
||||
except OSError:
|
||||
logger.debug("Failed to remove checkpoint file %s", path, exc_info=True)
|
||||
|
||||
|
||||
def _prune(directory: str, max_keep: int) -> None:
|
||||
"""Remove oldest checkpoint files beyond *max_keep*."""
|
||||
pattern = os.path.join(directory, "*.json")
|
||||
files = sorted(glob.glob(pattern), key=os.path.getmtime)
|
||||
to_remove = files if max_keep == 0 else files[:-max_keep]
|
||||
for path in to_remove:
|
||||
_safe_remove(path)
|
||||
cfg.provider.prune(cfg.location, cfg.max_checkpoints)
|
||||
|
||||
|
||||
def _should_checkpoint(source: Any, event: BaseEvent) -> CheckpointConfig | None:
|
||||
|
||||
@@ -34,27 +34,36 @@ class BaseProvider(Protocol):
|
||||
),
|
||||
)
|
||||
|
||||
def checkpoint(self, data: str, directory: str) -> str:
|
||||
def checkpoint(self, data: str, location: str) -> str:
|
||||
"""Persist a snapshot synchronously.
|
||||
|
||||
Args:
|
||||
data: The serialized string to persist.
|
||||
directory: Logical destination: path, bucket prefix, etc.
|
||||
location: Storage destination (directory, file path, URI, etc.).
|
||||
|
||||
Returns:
|
||||
A location identifier for the saved checkpoint, such as a file path or URI.
|
||||
A location identifier for the saved checkpoint.
|
||||
"""
|
||||
...
|
||||
|
||||
async def acheckpoint(self, data: str, directory: str) -> str:
|
||||
async def acheckpoint(self, data: str, location: str) -> str:
|
||||
"""Persist a snapshot asynchronously.
|
||||
|
||||
Args:
|
||||
data: The serialized string to persist.
|
||||
directory: Logical destination: path, bucket prefix, etc.
|
||||
location: Storage destination (directory, file path, URI, etc.).
|
||||
|
||||
Returns:
|
||||
A location identifier for the saved checkpoint, such as a file path or URI.
|
||||
A location identifier for the saved checkpoint.
|
||||
"""
|
||||
...
|
||||
|
||||
def prune(self, location: str, max_keep: int) -> None:
|
||||
"""Remove old checkpoints, keeping at most *max_keep*.
|
||||
|
||||
Args:
|
||||
location: The storage destination passed to ``checkpoint``.
|
||||
max_keep: Maximum number of checkpoints to retain.
|
||||
"""
|
||||
...
|
||||
|
||||
|
||||
@@ -3,6 +3,9 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
import glob
|
||||
import logging
|
||||
import os
|
||||
from pathlib import Path
|
||||
import uuid
|
||||
|
||||
@@ -12,43 +15,56 @@ import aiofiles.os
|
||||
from crewai.state.provider.core import BaseProvider
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class JsonProvider(BaseProvider):
|
||||
"""Persists runtime state checkpoints as JSON files on the local filesystem."""
|
||||
|
||||
def checkpoint(self, data: str, directory: str) -> str:
|
||||
"""Write a JSON checkpoint file to the directory.
|
||||
def checkpoint(self, data: str, location: str) -> str:
|
||||
"""Write a JSON checkpoint file.
|
||||
|
||||
Args:
|
||||
data: The serialized JSON string to persist.
|
||||
directory: Filesystem path where the checkpoint will be saved.
|
||||
location: Directory where the checkpoint will be saved.
|
||||
|
||||
Returns:
|
||||
The path to the written checkpoint file.
|
||||
"""
|
||||
file_path = _build_path(directory)
|
||||
file_path = _build_path(location)
|
||||
file_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
with open(file_path, "w") as f:
|
||||
f.write(data)
|
||||
return str(file_path)
|
||||
|
||||
async def acheckpoint(self, data: str, directory: str) -> str:
|
||||
"""Write a JSON checkpoint file to the directory asynchronously.
|
||||
async def acheckpoint(self, data: str, location: str) -> str:
|
||||
"""Write a JSON checkpoint file asynchronously.
|
||||
|
||||
Args:
|
||||
data: The serialized JSON string to persist.
|
||||
directory: Filesystem path where the checkpoint will be saved.
|
||||
location: Directory where the checkpoint will be saved.
|
||||
|
||||
Returns:
|
||||
The path to the written checkpoint file.
|
||||
"""
|
||||
file_path = _build_path(directory)
|
||||
file_path = _build_path(location)
|
||||
await aiofiles.os.makedirs(str(file_path.parent), exist_ok=True)
|
||||
|
||||
async with aiofiles.open(file_path, "w") as f:
|
||||
await f.write(data)
|
||||
return str(file_path)
|
||||
|
||||
def prune(self, location: str, max_keep: int) -> None:
|
||||
"""Remove oldest checkpoint files beyond *max_keep*."""
|
||||
pattern = os.path.join(location, "*.json")
|
||||
files = sorted(glob.glob(pattern), key=os.path.getmtime)
|
||||
for path in files if max_keep == 0 else files[:-max_keep]:
|
||||
try:
|
||||
os.remove(path)
|
||||
except OSError: # noqa: PERF203
|
||||
logger.debug("Failed to remove %s", path, exc_info=True)
|
||||
|
||||
def from_checkpoint(self, location: str) -> str:
|
||||
"""Read a JSON checkpoint file.
|
||||
|
||||
|
||||
@@ -43,58 +43,53 @@ def _make_id() -> tuple[str, str]:
|
||||
class SqliteProvider(BaseProvider):
|
||||
"""Persists runtime state checkpoints in a SQLite database.
|
||||
|
||||
The ``directory`` argument to ``checkpoint`` / ``acheckpoint`` is
|
||||
used as the database path (e.g. ``"./.checkpoints.db"``).
|
||||
|
||||
Args:
|
||||
max_checkpoints: Maximum number of checkpoints to retain.
|
||||
Oldest rows are pruned after each write. None keeps all.
|
||||
The ``location`` argument to ``checkpoint`` / ``acheckpoint`` is
|
||||
used as the database file path.
|
||||
"""
|
||||
|
||||
def __init__(self, max_checkpoints: int | None = None) -> None:
|
||||
self.max_checkpoints = max_checkpoints
|
||||
|
||||
def checkpoint(self, data: str, directory: str) -> str:
|
||||
def checkpoint(self, data: str, location: str) -> str:
|
||||
"""Write a checkpoint to the SQLite database.
|
||||
|
||||
Args:
|
||||
data: The serialized JSON string to persist.
|
||||
directory: Path to the SQLite database file.
|
||||
location: Path to the SQLite database file.
|
||||
|
||||
Returns:
|
||||
A location string in the format ``"db_path#checkpoint_id"``.
|
||||
"""
|
||||
checkpoint_id, ts = _make_id()
|
||||
Path(directory).parent.mkdir(parents=True, exist_ok=True)
|
||||
with sqlite3.connect(directory) as conn:
|
||||
Path(location).parent.mkdir(parents=True, exist_ok=True)
|
||||
with sqlite3.connect(location) as conn:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute(_CREATE_TABLE)
|
||||
conn.execute(_INSERT, (checkpoint_id, ts, data))
|
||||
if self.max_checkpoints is not None:
|
||||
conn.execute(_PRUNE, (self.max_checkpoints,))
|
||||
conn.commit()
|
||||
return f"{directory}#{checkpoint_id}"
|
||||
return f"{location}#{checkpoint_id}"
|
||||
|
||||
async def acheckpoint(self, data: str, directory: str) -> str:
|
||||
async def acheckpoint(self, data: str, location: str) -> str:
|
||||
"""Write a checkpoint to the SQLite database asynchronously.
|
||||
|
||||
Args:
|
||||
data: The serialized JSON string to persist.
|
||||
directory: Path to the SQLite database file.
|
||||
location: Path to the SQLite database file.
|
||||
|
||||
Returns:
|
||||
A location string in the format ``"db_path#checkpoint_id"``.
|
||||
"""
|
||||
checkpoint_id, ts = _make_id()
|
||||
Path(directory).parent.mkdir(parents=True, exist_ok=True)
|
||||
async with aiosqlite.connect(directory) as db:
|
||||
Path(location).parent.mkdir(parents=True, exist_ok=True)
|
||||
async with aiosqlite.connect(location) as db:
|
||||
await db.execute("PRAGMA journal_mode=WAL")
|
||||
await db.execute(_CREATE_TABLE)
|
||||
await db.execute(_INSERT, (checkpoint_id, ts, data))
|
||||
if self.max_checkpoints is not None:
|
||||
await db.execute(_PRUNE, (self.max_checkpoints,))
|
||||
await db.commit()
|
||||
return f"{directory}#{checkpoint_id}"
|
||||
return f"{location}#{checkpoint_id}"
|
||||
|
||||
def prune(self, location: str, max_keep: int) -> None:
|
||||
"""Remove oldest checkpoint rows beyond *max_keep*."""
|
||||
with sqlite3.connect(location) as conn:
|
||||
conn.execute(_PRUNE, (max_keep,))
|
||||
conn.commit()
|
||||
|
||||
def from_checkpoint(self, location: str) -> str:
|
||||
"""Read a checkpoint from the SQLite database.
|
||||
|
||||
@@ -90,29 +90,31 @@ class RuntimeState(RootModel): # type: ignore[type-arg]
|
||||
return state
|
||||
return handler(data)
|
||||
|
||||
def checkpoint(self, directory: str) -> str:
|
||||
"""Write a checkpoint file to the directory.
|
||||
def checkpoint(self, location: str) -> str:
|
||||
"""Write a checkpoint.
|
||||
|
||||
Args:
|
||||
directory: Filesystem path where the checkpoint JSON will be saved.
|
||||
location: Storage destination. For JsonProvider this is a directory
|
||||
path; for SqliteProvider it is a database file path.
|
||||
|
||||
Returns:
|
||||
A location identifier for the saved checkpoint.
|
||||
"""
|
||||
_prepare_entities(self.root)
|
||||
return self._provider.checkpoint(self.model_dump_json(), directory)
|
||||
return self._provider.checkpoint(self.model_dump_json(), location)
|
||||
|
||||
async def acheckpoint(self, directory: str) -> str:
|
||||
async def acheckpoint(self, location: str) -> str:
|
||||
"""Async version of :meth:`checkpoint`.
|
||||
|
||||
Args:
|
||||
directory: Filesystem path where the checkpoint JSON will be saved.
|
||||
location: Storage destination. For JsonProvider this is a directory
|
||||
path; for SqliteProvider it is a database file path.
|
||||
|
||||
Returns:
|
||||
A location identifier for the saved checkpoint.
|
||||
"""
|
||||
_prepare_entities(self.root)
|
||||
return await self._provider.acheckpoint(self.model_dump_json(), directory)
|
||||
return await self._provider.acheckpoint(self.model_dump_json(), location)
|
||||
|
||||
@classmethod
|
||||
def from_checkpoint(
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import asyncio
|
||||
import concurrent.futures
|
||||
from collections.abc import Coroutine
|
||||
import concurrent.futures
|
||||
import contextvars
|
||||
import inspect
|
||||
from typing import Any
|
||||
|
||||
@@ -17,10 +17,10 @@ from crewai.flow.flow import Flow, start
|
||||
from crewai.state.checkpoint_config import CheckpointConfig
|
||||
from crewai.state.checkpoint_listener import (
|
||||
_find_checkpoint,
|
||||
_prune,
|
||||
_resolve,
|
||||
_SENTINEL,
|
||||
)
|
||||
from crewai.state.provider.json_provider import JsonProvider
|
||||
from crewai.task import Task
|
||||
|
||||
|
||||
@@ -37,10 +37,10 @@ class TestResolve:
|
||||
def test_true_returns_config(self) -> None:
|
||||
result = _resolve(True)
|
||||
assert isinstance(result, CheckpointConfig)
|
||||
assert result.directory == "./.checkpoints"
|
||||
assert result.location == "./.checkpoints"
|
||||
|
||||
def test_config_returns_config(self) -> None:
|
||||
cfg = CheckpointConfig(directory="/tmp/cp")
|
||||
cfg = CheckpointConfig(location="/tmp/cp")
|
||||
assert _resolve(cfg) is cfg
|
||||
|
||||
|
||||
@@ -77,12 +77,12 @@ class TestFindCheckpoint:
|
||||
|
||||
def test_agent_config_overrides_crew(self) -> None:
|
||||
a = self._make_agent(
|
||||
checkpoint=CheckpointConfig(directory="/agent_cp")
|
||||
checkpoint=CheckpointConfig(location="/agent_cp")
|
||||
)
|
||||
self._make_crew([a], checkpoint=True)
|
||||
cfg = _find_checkpoint(a)
|
||||
assert isinstance(cfg, CheckpointConfig)
|
||||
assert cfg.directory == "/agent_cp"
|
||||
assert cfg.location == "/agent_cp"
|
||||
|
||||
def test_task_inherits_from_crew(self) -> None:
|
||||
a = self._make_agent()
|
||||
@@ -123,7 +123,7 @@ class TestPrune:
|
||||
# Ensure distinct mtime
|
||||
time.sleep(0.01)
|
||||
|
||||
_prune(d, max_keep=2)
|
||||
JsonProvider().prune(d, max_keep=2)
|
||||
remaining = os.listdir(d)
|
||||
assert len(remaining) == 2
|
||||
assert "cp_3.json" in remaining
|
||||
@@ -135,7 +135,7 @@ class TestPrune:
|
||||
with open(os.path.join(d, f"cp_{i}.json"), "w") as f:
|
||||
f.write("{}")
|
||||
|
||||
_prune(d, max_keep=0)
|
||||
JsonProvider().prune(d, max_keep=0)
|
||||
assert os.listdir(d) == []
|
||||
|
||||
def test_prune_more_than_existing(self) -> None:
|
||||
@@ -143,7 +143,7 @@ class TestPrune:
|
||||
with open(os.path.join(d, "cp.json"), "w") as f:
|
||||
f.write("{}")
|
||||
|
||||
_prune(d, max_keep=10)
|
||||
JsonProvider().prune(d, max_keep=10)
|
||||
assert len(os.listdir(d)) == 1
|
||||
|
||||
|
||||
@@ -153,7 +153,7 @@ class TestPrune:
|
||||
class TestCheckpointConfig:
|
||||
def test_defaults(self) -> None:
|
||||
cfg = CheckpointConfig()
|
||||
assert cfg.directory == "./.checkpoints"
|
||||
assert cfg.location == "./.checkpoints"
|
||||
assert cfg.on_events == ["task_completed"]
|
||||
assert cfg.max_checkpoints is None
|
||||
assert not cfg.trigger_all
|
||||
|
||||
@@ -1,3 +1,3 @@
|
||||
"""CrewAI development tools."""
|
||||
|
||||
__version__ = "1.14.0a3"
|
||||
__version__ = "1.14.0"
|
||||
|
||||
18
uv.lock
generated
18
uv.lock
generated
@@ -13,7 +13,7 @@ resolution-markers = [
|
||||
]
|
||||
|
||||
[options]
|
||||
exclude-newer = "2026-04-03T16:45:28.209407Z"
|
||||
exclude-newer = "2026-04-04T15:11:41.651093Z"
|
||||
exclude-newer-span = "P3D"
|
||||
|
||||
[manifest]
|
||||
@@ -1400,7 +1400,6 @@ source = { editable = "lib/crewai-tools" }
|
||||
dependencies = [
|
||||
{ name = "beautifulsoup4" },
|
||||
{ name = "crewai" },
|
||||
{ name = "docker" },
|
||||
{ name = "pymupdf" },
|
||||
{ name = "python-docx" },
|
||||
{ name = "pytube" },
|
||||
@@ -1537,7 +1536,6 @@ requires-dist = [
|
||||
{ name = "crewai", editable = "lib/crewai" },
|
||||
{ name = "cryptography", marker = "extra == 'snowflake'", specifier = ">=43.0.3" },
|
||||
{ name = "databricks-sdk", marker = "extra == 'databricks-sdk'", specifier = ">=0.46.0" },
|
||||
{ name = "docker", specifier = "~=7.1.0" },
|
||||
{ name = "exa-py", marker = "extra == 'exa-py'", specifier = ">=1.8.7" },
|
||||
{ name = "firecrawl-py", marker = "extra == 'firecrawl-py'", specifier = ">=1.8.0" },
|
||||
{ name = "gitpython", marker = "extra == 'github'", specifier = ">=3.1.41,<4" },
|
||||
@@ -1820,20 +1818,6 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/ba/5a/18ad964b0086c6e62e2e7500f7edc89e3faa45033c71c1893d34eed2b2de/dnspython-2.8.0-py3-none-any.whl", hash = "sha256:01d9bbc4a2d76bf0db7c1f729812ded6d912bd318d3b1cf81d30c0f845dbf3af", size = 331094, upload-time = "2025-09-07T18:57:58.071Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docker"
|
||||
version = "7.1.0"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
dependencies = [
|
||||
{ name = "pywin32", marker = "sys_platform == 'win32'" },
|
||||
{ name = "requests" },
|
||||
{ name = "urllib3" },
|
||||
]
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/91/9b/4a2ea29aeba62471211598dac5d96825bb49348fa07e906ea930394a83ce/docker-7.1.0.tar.gz", hash = "sha256:ad8c70e6e3f8926cb8a92619b832b4ea5299e2831c14284663184e200546fa6c", size = 117834, upload-time = "2024-05-23T11:13:57.216Z" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "docling"
|
||||
version = "2.75.0"
|
||||
|
||||
Reference in New Issue
Block a user