mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-05-03 16:22:49 +00:00
Merge commit 'e16606672afab6c257010ce4a0ff1614740aa096' as 'packages/tools'
This commit is contained in:
20
packages/tools/crewai_tools/rag/chunkers/web_chunker.py
Normal file
20
packages/tools/crewai_tools/rag/chunkers/web_chunker.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from crewai_tools.rag.chunkers.base_chunker import BaseChunker
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
class WebsiteChunker(BaseChunker):
|
||||
def __init__(self, chunk_size: int = 2500, chunk_overlap: int = 250, separators: Optional[List[str]] = None, keep_separator: bool = True):
|
||||
if separators is None:
|
||||
separators = [
|
||||
"\n\n\n", # Major section breaks
|
||||
"\n\n", # Paragraph breaks
|
||||
"\n", # Line breaks
|
||||
". ", # Sentence endings
|
||||
"! ", # Exclamation endings
|
||||
"? ", # Question endings
|
||||
"; ", # Semicolon breaks
|
||||
", ", # Comma breaks
|
||||
" ", # Word breaks
|
||||
"", # Character level
|
||||
]
|
||||
super().__init__(chunk_size, chunk_overlap, separators, keep_separator)
|
||||
Reference in New Issue
Block a user