mirror of
https://github.com/crewAIInc/crewAI.git
synced 2026-01-10 08:38:30 +00:00
fix: allow adding RAG source content from valid URLs (#3831)
Some checks failed
Some checks failed
This commit is contained in:
@@ -229,6 +229,7 @@ class CrewAIRagAdapter(Adapter):
|
|||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
metadata: dict[str, Any] = base_metadata.copy()
|
metadata: dict[str, Any] = base_metadata.copy()
|
||||||
|
source_content = SourceContent(source_ref)
|
||||||
|
|
||||||
if data_type in [
|
if data_type in [
|
||||||
DataType.PDF_FILE,
|
DataType.PDF_FILE,
|
||||||
@@ -239,13 +240,12 @@ class CrewAIRagAdapter(Adapter):
|
|||||||
DataType.XML,
|
DataType.XML,
|
||||||
DataType.MDX,
|
DataType.MDX,
|
||||||
]:
|
]:
|
||||||
if not os.path.isfile(source_ref):
|
if not source_content.is_url() and not source_content.path_exists():
|
||||||
raise FileNotFoundError(f"File does not exist: {source_ref}")
|
raise FileNotFoundError(f"File does not exist: {source_ref}")
|
||||||
|
|
||||||
loader = data_type.get_loader()
|
loader = data_type.get_loader()
|
||||||
chunker = data_type.get_chunker()
|
chunker = data_type.get_chunker()
|
||||||
|
|
||||||
source_content = SourceContent(source_ref)
|
|
||||||
loader_result: LoaderResult = loader.load(source_content)
|
loader_result: LoaderResult = loader.load(source_content)
|
||||||
|
|
||||||
chunks = chunker.chunk(loader_result.content)
|
chunks = chunker.chunk(loader_result.content)
|
||||||
|
|||||||
Reference in New Issue
Block a user