From 70910dd7b46de0d99ddf96670d6a3e75a01ca29e Mon Sep 17 00:00:00 2001
From: Lorenze Jay <lorenzejaytech@gmail.com>
Date: Tue, 19 Nov 2024 09:41:33 -0800
Subject: [PATCH] fix test

---
 src/crewai/knowledge/knowledge.py |   6 +-
 tests/knowledge/knowledge_test.py | 132 +++++++++++++++++++++---------
 2 files changed, 98 insertions(+), 40 deletions(-)

diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py
index e4e2c3c99..5707bc8ad 100644
--- a/src/crewai/knowledge/knowledge.py
+++ b/src/crewai/knowledge/knowledge.py
@@ -27,8 +27,10 @@ class Knowledge(BaseModel):
             for source in self.sources:
                 source.add()
         except Exception as e:
-            Logger.log(
-                "warning", f"Failed to add some sources during initialization: {e}"
+            Logger(verbose=True).log(
+                "warning",
+                f"Failed to init knowledge: {e}",
+                color="red",
             )
 
     def query(
diff --git a/tests/knowledge/knowledge_test.py b/tests/knowledge/knowledge_test.py
index 806d765e5..9554ae9c7 100644
--- a/tests/knowledge/knowledge_test.py
+++ b/tests/knowledge/knowledge_test.py
@@ -1,5 +1,6 @@
 """Test Knowledge creation and querying functionality."""
 
+import logging
 from pathlib import Path
 
 from crewai.knowledge.knowledge import Knowledge
@@ -10,19 +11,32 @@ from crewai.knowledge.source.pdf_knowledge_source import PDFKnowledgeSource
 from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
 from crewai.knowledge.source.text_file_knowledge_source import TextFileKnowledgeSource
 
+import pytest
+
+
+@pytest.fixture(autouse=True)
+def reset_knowledge_storage():
+    """Fixture to reset knowledge storage before each test."""
+    Knowledge().storage.reset()
+    yield
+
 
 def test_single_short_string():
+    logging.basicConfig(level=logging.INFO)
+
     # Create a knowledge base with a single short string
     content = "Brandon's favorite color is blue and he likes Mexican food."
-    string_source = StringKnowledgeSource(content=content)
+    string_source = StringKnowledgeSource(
+        content=content, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[string_source])
 
     # Perform a query
     query = "What is Brandon's favorite color?"
     results = knowledge_base.query(query)
 
-    # Assert that the results contain the expected information
-    assert any("blue" in result.lower() for result in results)
+    # # Assert that the results contain the expected information
+    assert any("blue" in result["context"].lower() for result in results)
 
 
 def test_single_2k_character_string():
@@ -49,7 +63,9 @@ def test_single_2k_character_string():
         "Brandon's favorite sport is basketball, and he often plays with his friends on weekends. "
         "He is also a fan of the Golden State Warriors and enjoys watching their games. "
     )
-    string_source = StringKnowledgeSource(content=content)
+    string_source = StringKnowledgeSource(
+        content=content, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[string_source])
 
     # Perform a query
@@ -57,7 +73,7 @@ def test_single_2k_character_string():
     results = knowledge_base.query(query)
 
     # Assert that the results contain the expected information
-    assert any("inception" in result.lower() for result in results)
+    assert any("inception" in result["context"].lower() for result in results)
 
 
 def test_multiple_short_strings():
@@ -67,7 +83,10 @@ def test_multiple_short_strings():
         "Brandon has a dog named Max.",
         "Brandon enjoys painting landscapes.",
     ]
-    string_sources = [StringKnowledgeSource(content=content) for content in contents]
+    string_sources = [
+        StringKnowledgeSource(content=content, metadata={"preference": "personal"})
+        for content in contents
+    ]
     knowledge_base = Knowledge(sources=string_sources)
 
     # Perform a query
@@ -75,7 +94,7 @@ def test_multiple_short_strings():
     results = knowledge_base.query(query)
 
     # Assert that the correct information is retrieved
-    assert any("max" in result.lower() for result in results)
+    assert any("max" in result["context"].lower() for result in results)
 
 
 def test_multiple_2k_character_strings():
@@ -128,7 +147,13 @@ def test_multiple_2k_character_strings():
         )
         * 2,  # Repeat to ensure it's 2k characters
     ]
-    string_sources = [StringKnowledgeSource(content=content) for content in contents]
+    string_sources = [
+        StringKnowledgeSource(content=content, metadata={"preference": "personal"})
+        for content in contents
+    ]
+    # Reset the knowledge storage for each test
+    # Knowledge().storage.reset()
+
     knowledge_base = Knowledge(sources=string_sources)
 
     # Perform a query
@@ -137,7 +162,8 @@ def test_multiple_2k_character_strings():
 
     # Assert that the correct information is retrieved
     assert any(
-        "the hitchhiker's guide to the galaxy" in result.lower() for result in results
+        "the hitchhiker's guide to the galaxy" in result["context"].lower()
+        for result in results
     )
 
 
@@ -148,7 +174,9 @@ def test_single_short_file(tmpdir):
     with open(file_path, "w") as f:
         f.write(content)
 
-    file_source = TextFileKnowledgeSource(file_path=file_path)
+    file_source = TextFileKnowledgeSource(
+        file_path=file_path, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[file_source])
 
     # Perform a query
@@ -156,7 +184,7 @@ def test_single_short_file(tmpdir):
     results = knowledge_base.query(query)
 
     # Assert that the results contain the expected information
-    assert any("basketball" in result.lower() for result in results)
+    assert any("basketball" in result["context"].lower() for result in results)
 
 
 def test_single_2k_character_file(tmpdir):
@@ -187,7 +215,9 @@ def test_single_2k_character_file(tmpdir):
     with open(file_path, "w") as f:
         f.write(content)
 
-    file_source = TextFileKnowledgeSource(file_path=file_path)
+    file_source = TextFileKnowledgeSource(
+        file_path=file_path, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[file_source])
 
     # Perform a query
@@ -195,32 +225,43 @@ def test_single_2k_character_file(tmpdir):
     results = knowledge_base.query(query)
 
     # Assert that the results contain the expected information
-    assert any("inception" in result.lower() for result in results)
+    assert any("inception" in result["context"].lower() for result in results)
 
 
 def test_multiple_short_files(tmpdir):
     # Create multiple short text files
     contents = [
-        "Brandon lives in New York.",
-        "Brandon works as a software engineer.",
-        "Brandon enjoys cooking Italian food.",
+        {
+            "content": "Brandon works as a software engineer.",
+            "metadata": {"category": "profession", "source": "occupation"},
+        },
+        {
+            "content": "Brandon lives in New York.",
+            "metadata": {"category": "city", "source": "personal"},
+        },
+        {
+            "content": "Brandon enjoys cooking Italian food.",
+            "metadata": {"category": "hobby", "source": "personal"},
+        },
     ]
     file_paths = []
-    for i, content in enumerate(contents):
+    for i, item in enumerate(contents):
         file_path = Path(tmpdir.join(f"file_{i}.txt"))
         with open(file_path, "w") as f:
-            f.write(content)
-        file_paths.append(file_path)
+            f.write(item["content"])
+        file_paths.append((file_path, item["metadata"]))
 
-    file_sources = [TextFileKnowledgeSource(file_path=path) for path in file_paths]
+    file_sources = [
+        TextFileKnowledgeSource(file_path=path, metadata=metadata)
+        for path, metadata in file_paths
+    ]
     knowledge_base = Knowledge(sources=file_sources)
 
     # Perform a query
-    query = "Where does Brandon live?"
+    query = "What city does he reside in?"
     results = knowledge_base.query(query)
-
     # Assert that the correct information is retrieved
-    assert any("new york" in result.lower() for result in results)
+    assert any("new york" in result["context"].lower() for result in results)
 
 
 def test_multiple_2k_character_files(tmpdir):
@@ -280,7 +321,10 @@ def test_multiple_2k_character_files(tmpdir):
             f.write(content)
         file_paths.append(file_path)
 
-    file_sources = [TextFileKnowledgeSource(file_path=path) for path in file_paths]
+    file_sources = [
+        TextFileKnowledgeSource(file_path=path, metadata={"preference": "personal"})
+        for path in file_paths
+    ]
     knowledge_base = Knowledge(sources=file_sources)
 
     # Perform a query
@@ -289,7 +333,8 @@ def test_multiple_2k_character_files(tmpdir):
 
     # Assert that the correct information is retrieved
     assert any(
-        "the hitchhiker's guide to the galaxy" in result.lower() for result in results
+        "the hitchhiker's guide to the galaxy" in result["context"].lower()
+        for result in results
     )
 
 
@@ -300,7 +345,8 @@ def test_hybrid_string_and_files(tmpdir):
         "Brandon visited Paris last summer.",
     ]
     string_sources = [
-        StringKnowledgeSource(content=content) for content in string_contents
+        StringKnowledgeSource(content=content, metadata={"preference": "personal"})
+        for content in string_contents
     ]
 
     # Create file sources
@@ -315,7 +361,10 @@ def test_hybrid_string_and_files(tmpdir):
             f.write(content)
         file_paths.append(file_path)
 
-    file_sources = [TextFileKnowledgeSource(file_path=path) for path in file_paths]
+    file_sources = [
+        TextFileKnowledgeSource(file_path=path, metadata={"preference": "personal"})
+        for path in file_paths
+    ]
 
     # Combine string and file sources
     knowledge_base = Knowledge(sources=string_sources + file_sources)
@@ -325,7 +374,7 @@ def test_hybrid_string_and_files(tmpdir):
     results = knowledge_base.query(query)
 
     # Assert that the correct information is retrieved
-    assert any("the alchemist" in result.lower() for result in results)
+    assert any("the alchemist" in result["context"].lower() for result in results)
 
 
 def test_pdf_knowledge_source():
@@ -335,17 +384,18 @@ def test_pdf_knowledge_source():
     pdf_path = current_dir / "crewai_quickstart.pdf"
 
     # Create a PDFKnowledgeSource
-    pdf_source = PDFKnowledgeSource(file_path=pdf_path)
+    pdf_source = PDFKnowledgeSource(
+        file_path=pdf_path, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[pdf_source])
 
     # Perform a query
     query = "How do you create a crew?"
     results = knowledge_base.query(query)
 
-    print("Results from querying PDFKnowledgeSource:", results)
     # Assert that the correct information is retrieved
     assert any(
-        "crewai create crew latest-ai-development" in result.lower()
+        "crewai create crew latest-ai-development" in result["context"].lower()
         for result in results
     )
 
@@ -366,7 +416,9 @@ def test_csv_knowledge_source(tmpdir):
             f.write(",".join(row) + "\n")
 
     # Create a CSVKnowledgeSource
-    csv_source = CSVKnowledgeSource(file_path=csv_path)
+    csv_source = CSVKnowledgeSource(
+        file_path=csv_path, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[csv_source])
 
     # Perform a query
@@ -374,7 +426,7 @@ def test_csv_knowledge_source(tmpdir):
     results = knowledge_base.query(query)
 
     # Assert that the correct information is retrieved
-    assert any("30" in result for result in results)
+    assert any("30" in result["context"] for result in results)
 
 
 def test_json_knowledge_source(tmpdir):
@@ -395,15 +447,17 @@ def test_json_knowledge_source(tmpdir):
         json.dump(json_data, f)
 
     # Create a JSONKnowledgeSource
-    json_source = JSONKnowledgeSource(file_path=json_path)
+    json_source = JSONKnowledgeSource(
+        file_path=json_path, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[json_source])
 
     # Perform a query
-    query = "Where does Brandon live?"
+    query = "Where does Alice reside?"
     results = knowledge_base.query(query)
 
     # Assert that the correct information is retrieved
-    assert any("New York" in result for result in results)
+    assert any("los angeles" in result["context"].lower() for result in results)
 
 
 def test_excel_knowledge_source(tmpdir):
@@ -422,7 +476,9 @@ def test_excel_knowledge_source(tmpdir):
     df.to_excel(excel_path, index=False)
 
     # Create an ExcelKnowledgeSource
-    excel_source = ExcelKnowledgeSource(file_path=excel_path)
+    excel_source = ExcelKnowledgeSource(
+        file_path=excel_path, metadata={"preference": "personal"}
+    )
     knowledge_base = Knowledge(sources=[excel_source])
 
     # Perform a query
@@ -430,4 +486,4 @@ def test_excel_knowledge_source(tmpdir):
     results = knowledge_base.query(query)
 
     # Assert that the correct information is retrieved
-    assert any("30" in result for result in results)
+    assert any("30" in result["context"] for result in results)