feat: restructure project as UV workspace with crewai in lib/

2026-01-14 10:38:29 +00:00 · 2025-09-26 14:29:28 -04:00
parent 74b5c88834
commit daf6f679ff
763 changed files with 1181 additions and 398 deletions
--- a/tests/knowledge/init.py
+++ b/tests/knowledge/init.py
--- a/tests/knowledge/crewai_quickstart.pdf
+++ b/tests/knowledge/crewai_quickstart.pdf
--- a/tests/knowledge/test_knowledge.py
+++ b/tests/knowledge/test_knowledge.py
@@ -1,604 +0,0 @@
-"""Test Knowledge creation and querying functionality."""
-
-from pathlib import Path
-from unittest.mock import patch
-
-import pytest
-
-from crewai.knowledge.source.crew_docling_source import CrewDoclingSource
-from crewai.knowledge.source.csv_knowledge_source import CSVKnowledgeSource
-from crewai.knowledge.source.excel_knowledge_source import ExcelKnowledgeSource
-from crewai.knowledge.source.json_knowledge_source import JSONKnowledgeSource
-from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
-from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
-from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
-
-
-@pytest.fixture(autouse=True)
-def mock_vector_db():
-    """Mock vector database operations."""
-    with patch("crewai.knowledge.storage.knowledge_storage.KnowledgeStorage") as mock:
-        # Mock the query method to return a predefined response
-        instance = mock.return_value
-        instance.query.return_value = [
-            {
-                "content": "Brandon's favorite color is blue and he likes Mexican food.",
-                "score": 0.9,
-            }
-        ]
-        instance.reset.return_value = None
-        yield instance
-
-
-@pytest.fixture(autouse=True)
-def reset_knowledge_storage(mock_vector_db):
-    """Fixture to reset knowledge storage before each test."""
-    yield
-
-
-def test_single_short_string(mock_vector_db):
-    # Create a knowledge base with a single short string
-    content = "Brandon's favorite color is blue and he likes Mexican food."
-    string_source = StringKnowledgeSource(
-        content=content, metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [string_source]
-    mock_vector_db.query.return_value = [{"content": content, "score": 0.9}]
-    # Perform a query
-    query = "What is Brandon's favorite color?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the results contain the expected information
-    assert any("blue" in result["content"].lower() for result in results)
-    # Verify the mock was called
-    mock_vector_db.query.assert_called_once()
-
-
-# @pytest.mark.vcr(filter_headers=["authorization"])
-def test_single_2k_character_string(mock_vector_db):
-    # Create a 2k character string with various facts about Brandon
-    content = (
-        "Brandon is a software engineer who lives in San Francisco. "
-        "He enjoys hiking and often visits the trails in the Bay Area. "
-        "Brandon has a pet dog named Max, who is a golden retriever. "
-        "He loves reading science fiction books, and his favorite author is Isaac Asimov. "
-        "Brandon's favorite movie is Inception, and he enjoys watching it with his friends. "
-        "He is also a fan of Mexican cuisine, especially tacos and burritos. "
-        "Brandon plays the guitar and often performs at local open mic nights. "
-        "He is learning French and plans to visit Paris next year. "
-        "Brandon is passionate about technology and often attends tech meetups in the city. "
-        "He is also interested in AI and machine learning, and he is currently working on a project related to natural language processing. "
-        "Brandon's favorite color is blue, and he often wears blue shirts. "
-        "He enjoys cooking and often tries new recipes on weekends. "
-        "Brandon is a morning person and likes to start his day with a run in the park. "
-        "He is also a coffee enthusiast and enjoys trying different coffee blends. "
-        "Brandon is a member of a local book club and enjoys discussing books with fellow members. "
-        "He is also a fan of board games and often hosts game nights at his place. "
-        "Brandon is an advocate for environmental conservation and volunteers for local clean-up drives. "
-        "He is also a mentor for aspiring software developers and enjoys sharing his knowledge with others. "
-        "Brandon's favorite sport is basketball, and he often plays with his friends on weekends. "
-        "He is also a fan of the Golden State Warriors and enjoys watching their games. "
-    )
-    string_source = StringKnowledgeSource(
-        content=content, metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [string_source]
-    mock_vector_db.query.return_value = [{"content": content, "score": 0.9}]
-
-    # Perform a query
-    query = "What is Brandon's favorite movie?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the results contain the expected information
-    assert any("inception" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_multiple_short_strings(mock_vector_db):
-    # Create multiple short string sources
-    contents = [
-        "Brandon loves hiking.",
-        "Brandon has a dog named Max.",
-        "Brandon enjoys painting landscapes.",
-    ]
-    string_sources = [
-        StringKnowledgeSource(content=content, metadata={"preference": "personal"})
-        for content in contents
-    ]
-
-    # Mock the vector db query response
-    mock_vector_db.query.return_value = [
-        {"content": "Brandon has a dog named Max.", "score": 0.9}
-    ]
-
-    mock_vector_db.sources = string_sources
-
-    # Perform a query
-    query = "What is the name of Brandon's pet?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any("max" in result["content"].lower() for result in results)
-    # Verify the mock was called
-    mock_vector_db.query.assert_called_once()
-
-
-def test_multiple_2k_character_strings(mock_vector_db):
-    # Create multiple 2k character strings with various facts about Brandon
-    contents = [
-        (
-            "Brandon is a software engineer who lives in San Francisco. "
-            "He enjoys hiking and often visits the trails in the Bay Area. "
-            "Brandon has a pet dog named Max, who is a golden retriever. "
-            "He loves reading science fiction books, and his favorite author is Isaac Asimov. "
-            "Brandon's favorite movie is Inception, and he enjoys watching it with his friends. "
-            "He is also a fan of Mexican cuisine, especially tacos and burritos. "
-            "Brandon plays the guitar and often performs at local open mic nights. "
-            "He is learning French and plans to visit Paris next year. "
-            "Brandon is passionate about technology and often attends tech meetups in the city. "
-            "He is also interested in AI and machine learning, and he is currently working on a project related to natural language processing. "
-            "Brandon's favorite color is blue, and he often wears blue shirts. "
-            "He enjoys cooking and often tries new recipes on weekends. "
-            "Brandon is a morning person and likes to start his day with a run in the park. "
-            "He is also a coffee enthusiast and enjoys trying different coffee blends. "
-            "Brandon is a member of a local book club and enjoys discussing books with fellow members. "
-            "He is also a fan of board games and often hosts game nights at his place. "
-            "Brandon is an advocate for environmental conservation and volunteers for local clean-up drives. "
-            "He is also a mentor for aspiring software developers and enjoys sharing his knowledge with others. "
-            "Brandon's favorite sport is basketball, and he often plays with his friends on weekends. "
-            "He is also a fan of the Golden State Warriors and enjoys watching their games. "
-        )
-        * 2,  # Repeat to ensure it's 2k characters
-        (
-            "Brandon loves traveling and has visited over 20 countries. "
-            "He is fluent in Spanish and often practices with his friends. "
-            "Brandon's favorite city is Barcelona, where he enjoys the architecture and culture. "
-            "He is a foodie and loves trying new cuisines, with a particular fondness for sushi. "
-            "Brandon is an avid cyclist and participates in local cycling events. "
-            "He is also a photographer and enjoys capturing landscapes and cityscapes. "
-            "Brandon is a tech enthusiast and follows the latest trends in gadgets and software. "
-            "He is also a fan of virtual reality and owns a VR headset. "
-            "Brandon's favorite book is 'The Hitchhiker's Guide to the Galaxy'. "
-            "He enjoys watching documentaries and learning about history and science. "
-            "Brandon is a coffee lover and has a collection of coffee mugs from different countries. "
-            "He is also a fan of jazz music and often attends live performances. "
-            "Brandon is a member of a local running club and participates in marathons. "
-            "He is also a volunteer at a local animal shelter and helps with dog walking. "
-            "Brandon's favorite holiday is Christmas, and he enjoys decorating his home. "
-            "He is also a fan of classic movies and has a collection of DVDs. "
-            "Brandon is a mentor for young professionals and enjoys giving career advice. "
-            "He is also a fan of puzzles and enjoys solving them in his free time. "
-            "Brandon's favorite sport is soccer, and he often plays with his friends. "
-            "He is also a fan of FC Barcelona and enjoys watching their matches. "
-        )
-        * 2,  # Repeat to ensure it's 2k characters
-    ]
-    string_sources = [
-        StringKnowledgeSource(content=content, metadata={"preference": "personal"})
-        for content in contents
-    ]
-
-    mock_vector_db.sources = string_sources
-    mock_vector_db.query.return_value = [{"content": contents[1], "score": 0.9}]
-
-    # Perform a query
-    query = "What is Brandon's favorite book?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any(
-        "the hitchhiker's guide to the galaxy" in result["content"].lower()
-        for result in results
-    )
-    mock_vector_db.query.assert_called_once()
-
-
-def test_single_short_file(mock_vector_db, tmpdir):
-    # Create a single short text file
-    content = "Brandon's favorite sport is basketball."
-    file_path = Path(tmpdir.join("short_file.txt"))
-    with open(file_path, "w") as f:
-        f.write(content)
-
-    file_source = TextFileKnowledgeSource(
-        file_paths=[file_path], metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [file_source]
-    mock_vector_db.query.return_value = [{"content": content, "score": 0.9}]
-    # Perform a query
-    query = "What sport does Brandon like?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the results contain the expected information
-    assert any("basketball" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_single_2k_character_file(mock_vector_db, tmpdir):
-    # Create a single 2k character text file with various facts about Brandon
-    content = (
-        "Brandon is a software engineer who lives in San Francisco. "
-        "He enjoys hiking and often visits the trails in the Bay Area. "
-        "Brandon has a pet dog named Max, who is a golden retriever. "
-        "He loves reading science fiction books, and his favorite author is Isaac Asimov. "
-        "Brandon's favorite movie is Inception, and he enjoys watching it with his friends. "
-        "He is also a fan of Mexican cuisine, especially tacos and burritos. "
-        "Brandon plays the guitar and often performs at local open mic nights. "
-        "He is learning French and plans to visit Paris next year. "
-        "Brandon is passionate about technology and often attends tech meetups in the city. "
-        "He is also interested in AI and machine learning, and he is currently working on a project related to natural language processing. "
-        "Brandon's favorite color is blue, and he often wears blue shirts. "
-        "He enjoys cooking and often tries new recipes on weekends. "
-        "Brandon is a morning person and likes to start his day with a run in the park. "
-        "He is also a coffee enthusiast and enjoys trying different coffee blends. "
-        "Brandon is a member of a local book club and enjoys discussing books with fellow members. "
-        "He is also a fan of board games and often hosts game nights at his place. "
-        "Brandon is an advocate for environmental conservation and volunteers for local clean-up drives. "
-        "He is also a mentor for aspiring software developers and enjoys sharing his knowledge with others. "
-        "Brandon's favorite sport is basketball, and he often plays with his friends on weekends. "
-        "He is also a fan of the Golden State Warriors and enjoys watching their games. "
-    ) * 2  # Repeat to ensure it's 2k characters
-    file_path = Path(tmpdir.join("long_file.txt"))
-    with open(file_path, "w") as f:
-        f.write(content)
-
-    file_source = TextFileKnowledgeSource(
-        file_paths=[file_path], metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [file_source]
-    mock_vector_db.query.return_value = [{"content": content, "score": 0.9}]
-    # Perform a query
-    query = "What is Brandon's favorite movie?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the results contain the expected information
-    assert any("inception" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_multiple_short_files(mock_vector_db, tmpdir):
-    # Create multiple short text files
-    contents = [
-        {
-            "content": "Brandon works as a software engineer.",
-            "metadata": {"category": "profession", "source": "occupation"},
-        },
-        {
-            "content": "Brandon lives in New York.",
-            "metadata": {"category": "city", "source": "personal"},
-        },
-        {
-            "content": "Brandon enjoys cooking Italian food.",
-            "metadata": {"category": "hobby", "source": "personal"},
-        },
-    ]
-    file_paths = []
-    for i, item in enumerate(contents):
-        file_path = Path(tmpdir.join(f"file_{i}.txt"))
-        with open(file_path, "w") as f:
-            f.write(item["content"])
-        file_paths.append((file_path, item["metadata"]))
-
-    file_sources = [
-        TextFileKnowledgeSource(file_paths=[path], metadata=metadata)
-        for path, metadata in file_paths
-    ]
-    mock_vector_db.sources = file_sources
-    mock_vector_db.query.return_value = [
-        {"content": "Brandon lives in New York.", "score": 0.9}
-    ]
-    # Perform a query
-    query = "What city does he reside in?"
-    results = mock_vector_db.query(query)
-    # Assert that the correct information is retrieved
-    assert any("new york" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_multiple_2k_character_files(mock_vector_db, tmpdir):
-    # Create multiple 2k character text files with various facts about Brandon
-    contents = [
-        (
-            "Brandon loves traveling and has visited over 20 countries. "
-            "He is fluent in Spanish and often practices with his friends. "
-            "Brandon's favorite city is Barcelona, where he enjoys the architecture and culture. "
-            "He is a foodie and loves trying new cuisines, with a particular fondness for sushi. "
-            "Brandon is an avid cyclist and participates in local cycling events. "
-            "He is also a photographer and enjoys capturing landscapes and cityscapes. "
-            "Brandon is a tech enthusiast and follows the latest trends in gadgets and software. "
-            "He is also a fan of virtual reality and owns a VR headset. "
-            "Brandon's favorite book is 'The Hitchhiker's Guide to the Galaxy'. "
-            "He enjoys watching documentaries and learning about history and science. "
-            "Brandon is a coffee lover and has a collection of coffee mugs from different countries. "
-            "He is also a fan of jazz music and often attends live performances. "
-            "Brandon is a member of a local running club and participates in marathons. "
-            "He is also a volunteer at a local animal shelter and helps with dog walking. "
-            "Brandon's favorite holiday is Christmas, and he enjoys decorating his home. "
-            "He is also a fan of classic movies and has a collection of DVDs. "
-            "Brandon is a mentor for young professionals and enjoys giving career advice. "
-            "He is also a fan of puzzles and enjoys solving them in his free time. "
-            "Brandon's favorite sport is soccer, and he often plays with his friends. "
-            "He is also a fan of FC Barcelona and enjoys watching their matches. "
-        )
-        * 2,  # Repeat to ensure it's 2k characters
-        (
-            "Brandon is a software engineer who lives in San Francisco. "
-            "He enjoys hiking and often visits the trails in the Bay Area. "
-            "Brandon has a pet dog named Max, who is a golden retriever. "
-            "He loves reading science fiction books, and his favorite author is Isaac Asimov. "
-            "Brandon's favorite movie is Inception, and he enjoys watching it with his friends. "
-            "He is also a fan of Mexican cuisine, especially tacos and burritos. "
-            "Brandon plays the guitar and often performs at local open mic nights. "
-            "He is learning French and plans to visit Paris next year. "
-            "Brandon is passionate about technology and often attends tech meetups in the city. "
-            "He is also interested in AI and machine learning, and he is currently working on a project related to natural language processing. "
-            "Brandon's favorite color is blue, and he often wears blue shirts. "
-            "He enjoys cooking and often tries new recipes on weekends. "
-            "Brandon is a morning person and likes to start his day with a run in the park. "
-            "He is also a coffee enthusiast and enjoys trying different coffee blends. "
-            "Brandon is a member of a local book club and enjoys discussing books with fellow members. "
-            "He is also a fan of board games and often hosts game nights at his place. "
-            "Brandon is an advocate for environmental conservation and volunteers for local clean-up drives. "
-            "He is also a mentor for aspiring software developers and enjoys sharing his knowledge with others. "
-            "Brandon's favorite sport is basketball, and he often plays with his friends on weekends. "
-            "He is also a fan of the Golden State Warriors and enjoys watching their games. "
-        )
-        * 2,  # Repeat to ensure it's 2k characters
-    ]
-    file_paths = []
-    for i, content in enumerate(contents):
-        file_path = Path(tmpdir.join(f"long_file_{i}.txt"))
-        with open(file_path, "w") as f:
-            f.write(content)
-        file_paths.append(file_path)
-
-    file_sources = [
-        TextFileKnowledgeSource(file_paths=[path], metadata={"preference": "personal"})
-        for path in file_paths
-    ]
-    mock_vector_db.sources = file_sources
-    mock_vector_db.query.return_value = [
-        {
-            "content": "Brandon's favorite book is 'The Hitchhiker's Guide to the Galaxy'.",
-            "score": 0.9,
-        }
-    ]
-    # Perform a query
-    query = "What is Brandon's favorite book?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any(
-        "the hitchhiker's guide to the galaxy" in result["content"].lower()
-        for result in results
-    )
-    mock_vector_db.query.assert_called_once()
-
-
-@pytest.mark.vcr(filter_headers=["authorization"])
-def test_hybrid_string_and_files(mock_vector_db, tmpdir):
-    # Create string sources
-    string_contents = [
-        "Brandon is learning French.",
-        "Brandon visited Paris last summer.",
-    ]
-    string_sources = [
-        StringKnowledgeSource(content=content, metadata={"preference": "personal"})
-        for content in string_contents
-    ]
-
-    # Create file sources
-    file_contents = [
-        "Brandon prefers tea over coffee.",
-        "Brandon's favorite book is 'The Alchemist'.",
-    ]
-    file_paths = []
-    for i, content in enumerate(file_contents):
-        file_path = Path(tmpdir.join(f"file_{i}.txt"))
-        with open(file_path, "w") as f:
-            f.write(content)
-        file_paths.append(file_path)
-
-    file_sources = [
-        TextFileKnowledgeSource(file_paths=[path], metadata={"preference": "personal"})
-        for path in file_paths
-    ]
-
-    # Combine string and file sources
-    mock_vector_db.sources = string_sources + file_sources
-    mock_vector_db.query.return_value = [{"content": file_contents[1], "score": 0.9}]
-
-    # Perform a query
-    query = "What is Brandon's favorite book?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any("the alchemist" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_pdf_knowledge_source(mock_vector_db):
-    # Get the directory of the current file
-    current_dir = Path(__file__).parent
-    # Construct the path to the PDF file
-    pdf_path = current_dir / "crewai_quickstart.pdf"
-
-    # Create a PDFKnowledgeSource
-    pdf_source = PDFKnowledgeSource(
-        file_paths=[pdf_path], metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [pdf_source]
-    mock_vector_db.query.return_value = [
-        {"content": "crewai create crew latest-ai-development", "score": 0.9}
-    ]
-
-    # Perform a query
-    query = "How do you create a crew?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any(
-        "crewai create crew latest-ai-development" in result["content"].lower()
-        for result in results
-    )
-    mock_vector_db.query.assert_called_once()
-
-
-@pytest.mark.vcr(filter_headers=["authorization"])
-def test_csv_knowledge_source(mock_vector_db, tmpdir):
-    """Test CSVKnowledgeSource with a simple CSV file."""
-
-    # Create a CSV file with sample data
-    csv_content = [
-        ["Name", "Age", "City"],
-        ["Brandon", "30", "New York"],
-        ["Alice", "25", "Los Angeles"],
-        ["Bob", "35", "Chicago"],
-    ]
-    csv_path = Path(tmpdir.join("data.csv"))
-    with open(csv_path, "w", encoding="utf-8") as f:
-        for row in csv_content:
-            f.write(",".join(row) + "\n")
-
-    # Create a CSVKnowledgeSource
-    csv_source = CSVKnowledgeSource(
-        file_paths=[csv_path], metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [csv_source]
-    mock_vector_db.query.return_value = [
-        {"content": "Brandon is 30 years old.", "score": 0.9}
-    ]
-
-    # Perform a query
-    query = "How old is Brandon?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any("30" in result["content"] for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_json_knowledge_source(mock_vector_db, tmpdir):
-    """Test JSONKnowledgeSource with a simple JSON file."""
-
-    # Create a JSON file with sample data
-    json_data = {
-        "people": [
-            {"name": "Brandon", "age": 30, "city": "New York"},
-            {"name": "Alice", "age": 25, "city": "Los Angeles"},
-            {"name": "Bob", "age": 35, "city": "Chicago"},
-        ]
-    }
-    json_path = Path(tmpdir.join("data.json"))
-    with open(json_path, "w", encoding="utf-8") as f:
-        import json
-
-        json.dump(json_data, f)
-
-    # Create a JSONKnowledgeSource
-    json_source = JSONKnowledgeSource(
-        file_paths=[json_path], metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [json_source]
-    mock_vector_db.query.return_value = [
-        {"content": "Alice lives in Los Angeles.", "score": 0.9}
-    ]
-
-    # Perform a query
-    query = "Where does Alice reside?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any("los angeles" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-def test_excel_knowledge_source(mock_vector_db, tmpdir):
-    """Test ExcelKnowledgeSource with a simple Excel file."""
-
-    # Create an Excel file with sample data
-    import pandas as pd  # type: ignore[import-untyped]
-
-    excel_data = {
-        "Name": ["Brandon", "Alice", "Bob"],
-        "Age": [30, 25, 35],
-        "City": ["New York", "Los Angeles", "Chicago"],
-    }
-    df = pd.DataFrame(excel_data)
-    excel_path = Path(tmpdir.join("data.xlsx"))
-    df.to_excel(excel_path, index=False)
-
-    # Create an ExcelKnowledgeSource
-    excel_source = ExcelKnowledgeSource(
-        file_paths=[excel_path], metadata={"preference": "personal"}
-    )
-    mock_vector_db.sources = [excel_source]
-    mock_vector_db.query.return_value = [
-        {"content": "Brandon is 30 years old.", "score": 0.9}
-    ]
-
-    # Perform a query
-    query = "What is Brandon's age?"
-    results = mock_vector_db.query(query)
-
-    # Assert that the correct information is retrieved
-    assert any("30" in result["content"] for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-@pytest.mark.vcr
-def test_docling_source(mock_vector_db):
-    docling_source = CrewDoclingSource(
-        file_paths=[
-            "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
-        ],
-    )
-    mock_vector_db.sources = [docling_source]
-    mock_vector_db.query.return_value = [
-        {
-            "content": "Reward hacking is a technique used to improve the performance of reinforcement learning agents.",
-            "score": 0.9,
-        }
-    ]
-    # Perform a query
-    query = "What is reward hacking?"
-    results = mock_vector_db.query(query)
-    assert any("reward hacking" in result["content"].lower() for result in results)
-    mock_vector_db.query.assert_called_once()
-
-
-@pytest.mark.vcr
-def test_multiple_docling_sources() -> None:
-    urls: list[Path | str] = [
-        "https://lilianweng.github.io/posts/2024-11-28-reward-hacking/",
-        "https://lilianweng.github.io/posts/2024-07-07-hallucination/",
-    ]
-    docling_source = CrewDoclingSource(file_paths=urls)
-
-    assert docling_source.file_paths == urls
-    assert docling_source.content is not None
-
-
-def test_file_path_validation():
-    """Test file path validation for knowledge sources."""
-    current_dir = Path(__file__).parent
-    pdf_path = current_dir / "crewai_quickstart.pdf"
-
-    # Test valid single file_path
-    source = PDFKnowledgeSource(file_path=pdf_path)
-    assert source.safe_file_paths == [pdf_path]
-
-    # Test valid file_paths list
-    source = PDFKnowledgeSource(file_paths=[pdf_path])
-    assert source.safe_file_paths == [pdf_path]
-
-    # Test both file_path and file_paths provided (should use file_paths)
-    source = PDFKnowledgeSource(file_path=pdf_path, file_paths=[pdf_path])
-    assert source.safe_file_paths == [pdf_path]
-
-    # Test neither file_path nor file_paths provided
-    with pytest.raises(
-        ValueError,
-        match="file_path/file_paths must be a Path, str, or a list of these types",
-    ):
-        PDFKnowledgeSource()
--- a/tests/knowledge/test_knowledge_searchresult.py
+++ b/tests/knowledge/test_knowledge_searchresult.py
@@ -1,191 +0,0 @@
-"""Tests for Knowledge SearchResult type conversion and integration."""
-
-from typing import Any
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from crewai.knowledge.knowledge import Knowledge  # type: ignore[import-untyped]
-from crewai.knowledge.source.string_knowledge_source import (  # type: ignore[import-untyped]
-    StringKnowledgeSource,
-)
-from crewai.knowledge.utils.knowledge_utils import (  # type: ignore[import-untyped]
-    extract_knowledge_context,
-)
-
-
-def test_knowledge_query_returns_searchresult() -> None:
-    """Test that Knowledge.query returns SearchResult format."""
-    with patch("crewai.knowledge.knowledge.KnowledgeStorage") as mock_storage_class:
-        mock_storage = MagicMock()
-        mock_storage_class.return_value = mock_storage
-        mock_storage.search.return_value = [
-            {
-                "content": "AI is fascinating",
-                "score": 0.9,
-                "metadata": {"source": "doc1"},
-            },
-            {
-                "content": "Machine learning rocks",
-                "score": 0.8,
-                "metadata": {"source": "doc2"},
-            },
-        ]
-
-        sources = [StringKnowledgeSource(content="Test knowledge content")]
-        knowledge = Knowledge(collection_name="test_collection", sources=sources)
-
-        results = knowledge.query(
-            ["AI technology"], results_limit=5, score_threshold=0.3
-        )
-
-        mock_storage.search.assert_called_once_with(
-            ["AI technology"], limit=5, score_threshold=0.3
-        )
-
-        assert isinstance(results, list)
-        assert len(results) == 2
-
-        for result in results:
-            assert isinstance(result, dict)
-            assert "content" in result
-            assert "score" in result
-            assert "metadata" in result
-
-        assert results[0]["content"] == "AI is fascinating"
-        assert results[0]["score"] == 0.9
-        assert results[1]["content"] == "Machine learning rocks"
-        assert results[1]["score"] == 0.8
-
-
-def test_knowledge_query_with_empty_results() -> None:
-    """Test Knowledge.query with empty search results."""
-    with patch("crewai.knowledge.knowledge.KnowledgeStorage") as mock_storage_class:
-        mock_storage = MagicMock()
-        mock_storage_class.return_value = mock_storage
-        mock_storage.search.return_value = []
-
-        sources = [StringKnowledgeSource(content="Test content")]
-        knowledge = Knowledge(collection_name="empty_test", sources=sources)
-
-        results = knowledge.query(["nonexistent query"])
-
-        assert isinstance(results, list)
-        assert len(results) == 0
-
-
-def test_extract_knowledge_context_with_searchresult() -> None:
-    """Test extract_knowledge_context works with SearchResult format."""
-    search_results = [
-        {"content": "Python is great for AI", "score": 0.95, "metadata": {}},
-        {"content": "Machine learning algorithms", "score": 0.88, "metadata": {}},
-        {"content": "Deep learning frameworks", "score": 0.82, "metadata": {}},
-    ]
-
-    context = extract_knowledge_context(search_results)
-
-    assert "Additional Information:" in context
-    assert "Python is great for AI" in context
-    assert "Machine learning algorithms" in context
-    assert "Deep learning frameworks" in context
-
-    expected_content = (
-        "Python is great for AI\nMachine learning algorithms\nDeep learning frameworks"
-    )
-    assert expected_content in context
-
-
-def test_extract_knowledge_context_with_empty_content() -> None:
-    """Test extract_knowledge_context handles empty or invalid content."""
-    search_results = [
-        {"content": "", "score": 0.5, "metadata": {}},
-        {"content": None, "score": 0.4, "metadata": {}},
-        {"score": 0.3, "metadata": {}},
-    ]
-
-    context = extract_knowledge_context(search_results)
-
-    assert context == ""
-
-
-def test_extract_knowledge_context_filters_invalid_results() -> None:
-    """Test that extract_knowledge_context filters out invalid results."""
-    search_results: list[dict[str, Any] | None] = [
-        {"content": "Valid content 1", "score": 0.9, "metadata": {}},
-        {"content": "", "score": 0.8, "metadata": {}},
-        {"content": "Valid content 2", "score": 0.7, "metadata": {}},
-        None,
-        {"content": None, "score": 0.6, "metadata": {}},
-    ]
-
-    context = extract_knowledge_context(search_results)
-
-    assert "Additional Information:" in context
-    assert "Valid content 1" in context
-    assert "Valid content 2" in context
-    assert context.count("\n") == 1
-
-
-@patch("crewai.rag.config.utils.get_rag_client")
-@patch("crewai.knowledge.storage.knowledge_storage.KnowledgeStorage")
-def test_knowledge_storage_exception_handling(
-    mock_storage_class: MagicMock, mock_get_client: MagicMock
-) -> None:
-    """Test Knowledge handles storage exceptions gracefully."""
-    mock_storage = MagicMock()
-    mock_storage_class.return_value = mock_storage
-    mock_storage.search.side_effect = Exception("Storage error")
-
-    sources = [StringKnowledgeSource(content="Test content")]
-    knowledge = Knowledge(collection_name="error_test", sources=sources)
-
-    with pytest.raises(ValueError, match="Storage is not initialized"):
-        knowledge.storage = None
-        knowledge.query(["test query"])
-
-
-def test_knowledge_add_sources_integration() -> None:
-    """Test Knowledge.add_sources integrates properly with storage."""
-    with patch("crewai.knowledge.knowledge.KnowledgeStorage") as mock_storage_class:
-        mock_storage = MagicMock()
-        mock_storage_class.return_value = mock_storage
-
-        sources = [
-            StringKnowledgeSource(content="Content 1"),
-            StringKnowledgeSource(content="Content 2"),
-        ]
-        knowledge = Knowledge(collection_name="add_sources_test", sources=sources)
-
-        knowledge.add_sources()
-
-        for source in sources:
-            assert source.storage == mock_storage
-
-
-def test_knowledge_reset_integration() -> None:
-    """Test Knowledge.reset integrates with storage."""
-    with patch("crewai.knowledge.knowledge.KnowledgeStorage") as mock_storage_class:
-        mock_storage = MagicMock()
-        mock_storage_class.return_value = mock_storage
-
-        sources = [StringKnowledgeSource(content="Test content")]
-        knowledge = Knowledge(collection_name="reset_test", sources=sources)
-
-        knowledge.reset()
-
-        mock_storage.reset.assert_called_once()
-
-
-@patch("crewai.rag.config.utils.get_rag_client")
-@patch("crewai.knowledge.storage.knowledge_storage.KnowledgeStorage")
-def test_knowledge_reset_without_storage(
-    mock_storage_class: MagicMock, mock_get_client: MagicMock
-) -> None:
-    """Test Knowledge.reset raises error when storage is None."""
-    sources = [StringKnowledgeSource(content="Test content")]
-    knowledge = Knowledge(collection_name="no_storage_test", sources=sources)
-
-    knowledge.storage = None
-
-    with pytest.raises(ValueError, match="Storage is not initialized"):
-        knowledge.reset()
--- a/tests/knowledge/test_knowledge_storage_integration.py
+++ b/tests/knowledge/test_knowledge_storage_integration.py
@@ -1,196 +0,0 @@
-"""Integration tests for KnowledgeStorage RAG client migration."""
-
-from unittest.mock import MagicMock, patch
-
-import pytest
-
-from crewai.knowledge.storage.knowledge_storage import (  # type: ignore[import-untyped]
-    KnowledgeStorage,
-)
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-@patch("crewai.knowledge.storage.knowledge_storage.create_client")
-@patch("crewai.knowledge.storage.knowledge_storage.build_embedder")
-def test_knowledge_storage_uses_rag_client(
-    mock_get_embedding: MagicMock,
-    mock_create_client: MagicMock,
-    mock_get_client: MagicMock,
-) -> None:
-    """Test that KnowledgeStorage properly integrates with RAG client."""
-    mock_client = MagicMock()
-    mock_create_client.return_value = mock_client
-    mock_get_client.return_value = mock_client
-    mock_client.search.return_value = [
-        {"content": "test content", "score": 0.9, "metadata": {"source": "test"}}
-    ]
-
-    embedder_config = {"provider": "openai", "model": "text-embedding-3-small"}
-    storage = KnowledgeStorage(
-        embedder=embedder_config, collection_name="test_knowledge"
-    )
-
-    mock_create_client.assert_called_once()
-
-    results = storage.search(["test query"], limit=5, score_threshold=0.3)
-
-    mock_get_client.assert_not_called()
-    mock_client.search.assert_called_once_with(
-        collection_name="knowledge_test_knowledge",
-        query="test query",
-        limit=5,
-        metadata_filter=None,
-        score_threshold=0.3,
-    )
-
-    assert isinstance(results, list)
-    assert len(results) == 1
-    assert isinstance(results[0], dict)
-    assert "content" in results[0]
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_collection_name_prefixing(mock_get_client: MagicMock) -> None:
-    """Test that collection names are properly prefixed."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-    mock_client.search.return_value = []
-
-    storage = KnowledgeStorage(collection_name="custom_knowledge")
-    storage.search(["test"], limit=1)
-
-    mock_client.search.assert_called_once()
-    call_kwargs = mock_client.search.call_args.kwargs
-    assert call_kwargs["collection_name"] == "knowledge_custom_knowledge"
-
-    mock_client.reset_mock()
-    storage_default = KnowledgeStorage()
-    storage_default.search(["test"], limit=1)
-
-    call_kwargs = mock_client.search.call_args.kwargs
-    assert call_kwargs["collection_name"] == "knowledge"
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_save_documents_integration(mock_get_client: MagicMock) -> None:
-    """Test document saving through RAG client."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-
-    storage = KnowledgeStorage(collection_name="test_docs")
-    documents = ["Document 1 content", "Document 2 content"]
-
-    storage.save(documents)
-
-    mock_client.get_or_create_collection.assert_called_once_with(
-        collection_name="knowledge_test_docs"
-    )
-    mock_client.add_documents.assert_called_once()
-
-    call_kwargs = mock_client.add_documents.call_args.kwargs
-    added_docs = call_kwargs["documents"]
-    assert len(added_docs) == 2
-    assert added_docs[0]["content"] == "Document 1 content"
-    assert added_docs[1]["content"] == "Document 2 content"
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_reset_integration(mock_get_client: MagicMock) -> None:
-    """Test collection reset through RAG client."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-
-    storage = KnowledgeStorage(collection_name="test_reset")
-    storage.reset()
-
-    mock_client.delete_collection.assert_called_once_with(
-        collection_name="knowledge_test_reset"
-    )
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_search_error_handling(mock_get_client: MagicMock) -> None:
-    """Test error handling during search operations."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-    mock_client.search.side_effect = Exception("RAG client error")
-
-    storage = KnowledgeStorage(collection_name="error_test")
-
-    results = storage.search(["test query"])
-    assert results == []
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-@patch("crewai.knowledge.storage.knowledge_storage.build_embedder")
-def test_embedding_configuration_flow(
-    mock_get_embedding: MagicMock, mock_get_client: MagicMock
-) -> None:
-    """Test that embedding configuration flows properly to RAG client."""
-    mock_embedding_func = MagicMock()
-    mock_get_embedding.return_value = mock_embedding_func
-    mock_get_client.return_value = MagicMock()
-
-    embedder_config = {
-        "provider": "sentence-transformer",
-        "model_name": "all-MiniLM-L6-v2",
-    }
-
-    KnowledgeStorage(embedder=embedder_config, collection_name="embedding_test")
-
-    mock_get_embedding.assert_called_once_with(embedder_config)
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_query_list_conversion(mock_get_client: MagicMock) -> None:
-    """Test that query list is properly converted to string."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-    mock_client.search.return_value = []
-
-    storage = KnowledgeStorage()
-
-    storage.search(["single query"])
-    call_kwargs = mock_client.search.call_args.kwargs
-    assert call_kwargs["query"] == "single query"
-
-    mock_client.reset_mock()
-    storage.search(["query one", "query two"])
-    call_kwargs = mock_client.search.call_args.kwargs
-    assert call_kwargs["query"] == "query one query two"
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_metadata_filter_handling(mock_get_client: MagicMock) -> None:
-    """Test metadata filter parameter handling."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-    mock_client.search.return_value = []
-
-    storage = KnowledgeStorage()
-
-    metadata_filter = {"category": "technical", "priority": "high"}
-    storage.search(["test"], metadata_filter=metadata_filter)
-
-    call_kwargs = mock_client.search.call_args.kwargs
-    assert call_kwargs["metadata_filter"] == metadata_filter
-
-    mock_client.reset_mock()
-    storage.search(["test"], metadata_filter=None)
-
-    call_kwargs = mock_client.search.call_args.kwargs
-    assert call_kwargs["metadata_filter"] is None
-
-
-@patch("crewai.knowledge.storage.knowledge_storage.get_rag_client")
-def test_dimension_mismatch_error_handling(mock_get_client: MagicMock) -> None:
-    """Test specific handling of dimension mismatch errors."""
-    mock_client = MagicMock()
-    mock_get_client.return_value = mock_client
-    mock_client.get_or_create_collection.return_value = None
-    mock_client.add_documents.side_effect = Exception("dimension mismatch detected")
-
-    storage = KnowledgeStorage(collection_name="dimension_test")
-
-    with pytest.raises(ValueError, match="Embedding dimension mismatch"):
-        storage.save(["test document"])