Files
crewAI/lib/tools/src/crewai_tools/rag/chunkers/web_chunker.py
Greyson LaLonde e2270456c4 feat: add crewai-tools library to workspace
- Migrate crewai-tools as standalone package in lib/tools
- Configure UV workspace for monorepo structure
- Move assets to repository root
- Clean up duplicate README files
- Focus pre-commit hooks on lib/crewai/src only
2025-09-26 15:05:41 -04:00

21 lines
881 B
Python

from crewai_tools.rag.chunkers.base_chunker import BaseChunker
from typing import List, Optional
class WebsiteChunker(BaseChunker):
def __init__(self, chunk_size: int = 2500, chunk_overlap: int = 250, separators: Optional[List[str]] = None, keep_separator: bool = True):
if separators is None:
separators = [
"\n\n\n", # Major section breaks
"\n\n", # Paragraph breaks
"\n", # Line breaks
". ", # Sentence endings
"! ", # Exclamation endings
"? ", # Question endings
"; ", # Semicolon breaks
", ", # Comma breaks
" ", # Word breaks
"", # Character level
]
super().__init__(chunk_size, chunk_overlap, separators, keep_separator)