feat: replace embedchain with native crewai adapter (#451)

- Remove embedchain adapter; add crewai rag adapter and update all search tools  
- Add loaders: pdf, youtube (video & channel), github, docs site, mysql, postgresql  
- Add configurable similarity threshold, limit params, and embedding_model support  
- Improve chromadb compatibility (sanitize metadata, convert columns, fix chunking)  
- Fix xml encoding, Python 3.10 issues, and youtube url spoofing  
- Update crewai dependency and instructions; refresh uv.lock  
- Update tests for new rag adapter and search params
This commit is contained in:
Greyson LaLonde
2025-09-18 19:02:22 -04:00
committed by GitHub
parent 8d9cee45f2
commit e29ca9ec28
33 changed files with 1317 additions and 277 deletions

View File

@@ -112,7 +112,10 @@ class RecursiveCharacterTextSplitter:
if separator == "":
doc = "".join(current_doc)
else:
doc = separator.join(current_doc)
if self._keep_separator and separator == " ":
doc = "".join(current_doc)
else:
doc = separator.join(current_doc)
if doc:
docs.append(doc)
@@ -133,7 +136,10 @@ class RecursiveCharacterTextSplitter:
if separator == "":
doc = "".join(current_doc)
else:
doc = separator.join(current_doc)
if self._keep_separator and separator == " ":
doc = "".join(current_doc)
else:
doc = separator.join(current_doc)
if doc:
docs.append(doc)