feat: implement sequential chunk-based file analysis with agent memory aggregation

- Add ChunkBasedTask class extending Task for large file processing - Implement file chunking with configurable size and overlap - Add sequential chunk processing with memory integration - Include result aggregation and summarization capabilities - Add comprehensive tests and example usage - Resolves #3144 Co-Authored-By: Jo\u00E3o <joao@crewai.com>
2026-01-11 09:08:31 +00:00 · 2025-07-13 00:29:59 +00:00
parent e7a5747c6b
commit f836b2dc90
5 changed files with 521 additions and 0 deletions
--- a/examples/chunk_based_analysis_example.py
+++ b/examples/chunk_based_analysis_example.py
@@ -0,0 +1,57 @@
+"""
+Example: Sequential Chunk-Based File Analysis with CrewAI
+
+This example demonstrates how to use ChunkBasedTask to analyze large files
+by processing them in chunks with agent memory aggregation.
+"""
+
+from crewai import Agent, Crew
+from crewai.tasks.chunk_based_task import ChunkBasedTask
+
+
+def main():
+    document_analyzer = Agent(
+        role="Document Analyzer",
+        goal="Analyze documents thoroughly and extract key insights",
+        backstory="""You are an expert document analyst with years of experience 
+        in processing and understanding complex documents. You excel at identifying 
+        patterns, themes, and important information across large texts."""
+    )
+    
+    analysis_task = ChunkBasedTask(
+        description="""Analyze the provided document and identify:
+        1. Main themes and topics
+        2. Key arguments or points made
+        3. Important facts or data mentioned
+        4. Overall structure and organization""",
+        expected_output="""A comprehensive analysis report containing:
+        - Summary of main themes
+        - List of key points
+        - Notable facts and data
+        - Assessment of document structure""",
+        file_path="path/to/your/large_document.txt",
+        chunk_size=4000,
+        chunk_overlap=200,
+        aggregation_prompt="""Synthesize the analysis from all document chunks into 
+        a cohesive report that captures the document's essence while highlighting 
+        the most important insights discovered."""
+    )
+    
+    crew = Crew(
+        agents=[document_analyzer],
+        tasks=[analysis_task],
+        memory=True,
+        verbose=True
+    )
+    
+    result = crew.kickoff()
+    
+    print("Analysis Complete!")
+    print("Final Result:", result)
+    
+    chunk_results = analysis_task.get_chunk_results()
+    print(f"Processed {len(chunk_results)} chunks")
+
+
+if __name__ == "__main__":
+    main()