updated default embedder

2026-01-09 08:08:32 +00:00 · 2024-11-19 10:43:06 -08:00
parent cbfdbe3b68
commit e882725b8a
3 changed files with 136 additions and 6 deletions
--- a/src/crewai/knowledge/storage/knowledge_storage.py
+++ b/src/crewai/knowledge/storage/knowledge_storage.py
@@ -3,7 +3,7 @@ import contextlib
 import io
 import logging
 import chromadb
-
+import os
 from crewai.utilities.paths import db_storage_path
 from typing import Optional, List
 from typing import Dict, Any
@@ -104,9 +104,13 @@ class KnowledgeStorage(BaseKnowledgeStorage):
            raise Exception("Collection not initialized")

    def _create_default_embedding_function(self):
-        from crewai.knowledge.embedder.fastembed import FastEmbed
+        from chromadb.utils.embedding_functions.openai_embedding_function import (
+            OpenAIEmbeddingFunction,
+        )

-        return FastEmbed().embed_texts
+        return OpenAIEmbeddingFunction(
+            api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small"
+        )

    def _set_embedder_config(
        self, embedder_config: Optional[Dict[str, Any]] = None
--- a/tests/agent_test.py
+++ b/tests/agent_test.py
@@ -18,6 +18,7 @@ from crewai.tools.tool_usage import ToolUsage
 from crewai.tools.tool_usage_events import ToolUsageFinished
 from crewai.utilities import RPMController
 from crewai.utilities.events import Emitter
+from crewai.knowledge.knowledge import Knowledge


 def test_agent_llm_creation_with_env_vars():
@@ -1577,19 +1578,28 @@ def test_agent_execute_task_with_ollama():
    assert "AI" in result or "artificial intelligence" in result.lower()


+@pytest.fixture(autouse=True)
+def reset_knowledge_storage():
+    """Fixture to reset knowledge storage before each test."""
+    Knowledge().storage.reset()
+    yield
+
+
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_agent_with_knowledge_sources():
    # Create a knowledge source with some content
    content = "Brandon's favorite color is blue and he likes Mexican food."
-    string_source = StringKnowledgeSource(content=content)
+    string_source = StringKnowledgeSource(
+        content=content, metadata={"preference": "personal"}
+    )
+    Knowledge(sources=[string_source])

    # Create an agent with the knowledge source
    agent = Agent(
        role="Information Agent",
        goal="Provide information based on knowledge sources",
        backstory="You have access to specific knowledge sources.",
-        llm=LLM(model="gpt-3.5-turbo"),
-        knowledge_sources=[string_source],
+        llm=LLM(model="gpt-4o-mini"),
    )

    # Create a task that requires the agent to use the knowledge
--- a/tests/cassettes/test_agent_with_knowledge_sources.yaml
+++ b/tests/cassettes/test_agent_with_knowledge_sources.yaml
@@ -0,0 +1,116 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "system", "content": "You are Information Agent.
+      You have access to specific knowledge sources.\nYour personal goal is: Provide
+      information based on knowledge sources\nTo give my best complete final answer
+      to the task use the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}, {"role": "user", "content": "\nCurrent Task: What is Brandon''s
+      favorite color?\n\nThis is the expect criteria for your final answer: Brandon''s
+      favorite color.\nyou MUST return the actual complete content as the final answer,
+      not a summary.\n\nBegin! This is VERY important to you, use the tools available
+      and give your best Final Answer, your job depends on it!\n\nThought:"}], "model":
+      "gpt-4o-mini", "stop": ["\nObservation:"], "stream": false}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '931'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.52.1
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.52.1
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.5
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    content: "{\n  \"id\": \"chatcmpl-AVNRSfYiY3uAvBKIE5yPlxcmHF8hg\",\n  \"object\":
+      \"chat.completion\",\n  \"created\": 1732041642,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+      \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+      \"assistant\",\n        \"content\": \"I now can give a great answer  \\nFinal
+      Answer: Brandon's favorite color is blue.\",\n        \"refusal\": null\n      },\n
+      \     \"logprobs\": null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n
+      \ \"usage\": {\n    \"prompt_tokens\": 167,\n    \"completion_tokens\": 18,\n
+      \   \"total_tokens\": 185,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+      0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\": {\n
+      \     \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+      0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"system_fingerprint\":
+      \"fp_0705bf87c0\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e5254865d47680c-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 19 Nov 2024 18:40:42 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=WAd8fO8QSWocATajhCTglnUAXkZag7ld.Vvm.Ams__4-1732041642-1.0.1.1-nmO.TcMZYj2VLQlxzUEcktcauU.oPPtPX6qR9sxIzHzK2w9lBIlrTxTMe5_VwCe1bGWpX.6U02mrLHPf3N6UBw;
+        path=/; expires=Tue, 19-Nov-24 19:10:42 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=lNGPBYkRBbMGmmOXUkbBEMiRUIkqgUvHTFPPUTgpFY8-1732041642465-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - crewai-iuxna1
+      openai-processing-ms:
+      - '433'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999790'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_1db7656143a6fe8ae6334797387b1172
+    http_version: HTTP/1.1
+    status_code: 200
+version: 1