From 1cc9c981e402ab99b3082f151905f8fe175e218a Mon Sep 17 00:00:00 2001 From: Lorenze Jay Date: Mon, 27 Jan 2025 12:24:13 -0800 Subject: [PATCH] WIP: test check with prints --- src/crewai/agent.py | 4 +- src/crewai/agents/agent_builder/base_agent.py | 2 +- src/crewai/knowledge/knowledge.py | 29 ++++++++------ .../source/base_file_knowledge_source.py | 10 ++++- .../knowledge/source/base_knowledge_source.py | 2 + .../knowledge/storage/knowledge_storage.py | 20 +++++----- .../utilities/embedding_configurator.py | 5 ++- tests/agent_test.py | 15 ++++--- ...ith_knowledge_sources_works_with_copy.yaml | 40 +++++++++---------- 9 files changed, 76 insertions(+), 51 deletions(-) diff --git a/src/crewai/agent.py b/src/crewai/agent.py index 483be6f48..f153d5269 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -125,7 +125,7 @@ class Agent(BaseAgent): default="safe", description="Mode for code execution: 'safe' (using Docker) or 'unsafe' (direct execution).", ) - embedder_config: Optional[Dict[str, Any]] = Field( + embedder: Optional[Dict[str, Any]] = Field( default=None, description="Embedder configuration for the agent.", ) @@ -164,7 +164,7 @@ class Agent(BaseAgent): ): self._knowledge = Knowledge( sources=self.knowledge_sources, - embedder_config=self.embedder_config, + embedder=self.embedder, collection_name=knowledge_agent_name, storage=self.knowledge_storage or None, ) diff --git a/src/crewai/agents/agent_builder/base_agent.py b/src/crewai/agents/agent_builder/base_agent.py index c9c4368e1..2664e692a 100644 --- a/src/crewai/agents/agent_builder/base_agent.py +++ b/src/crewai/agents/agent_builder/base_agent.py @@ -266,7 +266,7 @@ class BaseAgent(ABC, BaseModel): "cache_handler", "llm", "knowledge_sources", - "_knowledge", + "formatting_errors", } # Copy llm diff --git a/src/crewai/knowledge/knowledge.py b/src/crewai/knowledge/knowledge.py index c964333c8..daa24df89 100644 --- a/src/crewai/knowledge/knowledge.py +++ b/src/crewai/knowledge/knowledge.py @@ -15,20 +15,20 @@ class Knowledge(BaseModel): Args: sources: List[BaseKnowledgeSource] = Field(default_factory=list) storage: Optional[KnowledgeStorage] = Field(default=None) - embedder_config: Optional[Dict[str, Any]] = None + embedder: Optional[Dict[str, Any]] = None """ sources: List[BaseKnowledgeSource] = Field(default_factory=list) model_config = ConfigDict(arbitrary_types_allowed=True) storage: Optional[KnowledgeStorage] = Field(default=None) - embedder_config: Optional[Dict[str, Any]] = None + embedder: Optional[Dict[str, Any]] = None collection_name: Optional[str] = None def __init__( self, collection_name: str, sources: List[BaseKnowledgeSource], - embedder_config: Optional[Dict[str, Any]] = None, + embedder: Optional[Dict[str, Any]] = None, storage: Optional[KnowledgeStorage] = None, **data, ): @@ -37,25 +37,25 @@ class Knowledge(BaseModel): self.storage = storage else: self.storage = KnowledgeStorage( - embedder_config=embedder_config, collection_name=collection_name + embedder=embedder, collection_name=collection_name ) self.sources = sources self.storage.initialize_knowledge_storage() - for source in sources: - source.storage = self.storage - source.add() + print("self.storage", self.storage) + + self._add_sources() def query(self, query: List[str], limit: int = 3) -> List[Dict[str, Any]]: """ Query across all knowledge sources to find the most relevant information. Returns the top_k most relevant chunks. - + Raises: ValueError: If storage is not initialized. """ if self.storage is None: raise ValueError("Storage is not initialized.") - + results = self.storage.search( query, limit, @@ -63,6 +63,11 @@ class Knowledge(BaseModel): return results def _add_sources(self): - for source in self.sources: - source.storage = self.storage - source.add() + try: + print("adding sources", self.storage) + for source in self.sources: + source.storage = self.storage + source.add() + except Exception as e: + print("Error adding sources", e) + raise e diff --git a/src/crewai/knowledge/source/base_file_knowledge_source.py b/src/crewai/knowledge/source/base_file_knowledge_source.py index ac345b6a6..59e14763c 100644 --- a/src/crewai/knowledge/source/base_file_knowledge_source.py +++ b/src/crewai/knowledge/source/base_file_knowledge_source.py @@ -29,12 +29,19 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): def validate_file_path(cls, v, info): """Validate that at least one of file_path or file_paths is provided.""" # Single check if both are None, O(1) instead of nested conditions - if v is None and info.data.get("file_path" if info.field_name == "file_paths" else "file_paths") is None: + if ( + v is None + and info.data.get( + "file_path" if info.field_name == "file_paths" else "file_paths" + ) + is None + ): raise ValueError("Either file_path or file_paths must be provided") return v def model_post_init(self, _): """Post-initialization method to load content.""" + print("model_post_init") self.safe_file_paths = self._process_file_paths() self.validate_content() self.content = self.load_content() @@ -64,6 +71,7 @@ class BaseFileKnowledgeSource(BaseKnowledgeSource, ABC): def _save_documents(self): """Save the documents to the storage.""" if self.storage: + print("saving source documents to storage") self.storage.save(self.chunks) else: raise ValueError("No storage found to save documents.") diff --git a/src/crewai/knowledge/source/base_knowledge_source.py b/src/crewai/knowledge/source/base_knowledge_source.py index b558a4b9a..32b545779 100644 --- a/src/crewai/knowledge/source/base_knowledge_source.py +++ b/src/crewai/knowledge/source/base_knowledge_source.py @@ -46,7 +46,9 @@ class BaseKnowledgeSource(BaseModel, ABC): Save the documents to the storage. This method should be called after the chunks and embeddings are generated. """ + print("saving documents", self.storage) if self.storage: + print("storage found") self.storage.save(self.chunks) else: raise ValueError("No storage found to save documents.") diff --git a/src/crewai/knowledge/storage/knowledge_storage.py b/src/crewai/knowledge/storage/knowledge_storage.py index 4a70c5997..beeb10f67 100644 --- a/src/crewai/knowledge/storage/knowledge_storage.py +++ b/src/crewai/knowledge/storage/knowledge_storage.py @@ -48,11 +48,11 @@ class KnowledgeStorage(BaseKnowledgeStorage): def __init__( self, - embedder_config: Optional[Dict[str, Any]] = None, + embedder: Optional[Dict[str, Any]] = None, collection_name: Optional[str] = None, ): self.collection_name = collection_name - self._set_embedder_config(embedder_config) + self._set_embedder_config(embedder) def search( self, @@ -99,8 +99,9 @@ class KnowledgeStorage(BaseKnowledgeStorage): ) if self.app: self.collection = self.app.get_or_create_collection( - name=collection_name, embedding_function=self.embedder_config + name=collection_name, embedding_function=self.embedder ) + print("db initialized", self.collection) else: raise Exception("Vector Database Client not initialized") except Exception: @@ -187,17 +188,18 @@ class KnowledgeStorage(BaseKnowledgeStorage): api_key=os.getenv("OPENAI_API_KEY"), model_name="text-embedding-3-small" ) - def _set_embedder_config( - self, embedder_config: Optional[Dict[str, Any]] = None - ) -> None: + def _set_embedder_config(self, embedder: Optional[Dict[str, Any]] = None) -> None: """Set the embedding configuration for the knowledge storage. Args: embedder_config (Optional[Dict[str, Any]]): Configuration dictionary for the embedder. If None or empty, defaults to the default embedding function. """ - self.embedder_config = ( - EmbeddingConfigurator().configure_embedder(embedder_config) - if embedder_config + print("embedder", embedder) + self.embedder = ( + EmbeddingConfigurator().configure_embedder(embedder) + if embedder else self._create_default_embedding_function() ) + print("self.embedder", self.embedder) + print("type of self.embedder", type(self.embedder)) diff --git a/src/crewai/utilities/embedding_configurator.py b/src/crewai/utilities/embedding_configurator.py index 71965bf53..1f94403d5 100644 --- a/src/crewai/utilities/embedding_configurator.py +++ b/src/crewai/utilities/embedding_configurator.py @@ -43,7 +43,10 @@ class EmbeddingConfigurator: raise Exception( f"Unsupported embedding provider: {provider}, supported providers: {list(self.embedding_functions.keys())}" ) - + print( + "self.embedding_functions[provider](config, model_name)", + self.embedding_functions[provider](config, model_name), + ) return self.embedding_functions[provider](config, model_name) @staticmethod diff --git a/tests/agent_test.py b/tests/agent_test.py index a2ff06963..01179be3f 100644 --- a/tests/agent_test.py +++ b/tests/agent_test.py @@ -1618,17 +1618,22 @@ def test_agent_with_knowledge_sources_works_with_copy(): mock_knowledge_instance.query.return_value = [{"content": content}] mock_knowledge_instance.save.return_value = None mock_knowledge_instance.initialize_knowledge_storage.return_value = None + mock_knowledge_instance.collection_name = "test_collection" + # mock_knowledge_instance.embedder = { + # "provider": "openai", + # "config": {"model_name": "text-embedding-3-small", "api_key": "123"}, + # } agent = Agent( role="Information Agent", goal="Provide information based on knowledge sources", backstory="You have access to specific knowledge sources.", - llm=LLM(model="gpt-4o-mini", api_key="123"), + llm=LLM(model="gpt-4o-mini"), knowledge_sources=[string_source], - embedder_config={ - "provider": "openai", - "config": {"model_name": "text-embedding-3-small", "api_key": "123"}, - }, + # embedder={ + # "provider": "openai", + # "config": {"model_name": "text-embedding-3-small", "api_key": "123"}, + # }, ) # Actually call copy instead of mocking it diff --git a/tests/cassettes/test_agent_with_knowledge_sources_works_with_copy.yaml b/tests/cassettes/test_agent_with_knowledge_sources_works_with_copy.yaml index 56ebce96a..176be39c8 100644 --- a/tests/cassettes/test_agent_with_knowledge_sources_works_with_copy.yaml +++ b/tests/cassettes/test_agent_with_knowledge_sources_works_with_copy.yaml @@ -41,10 +41,8 @@ interactions: \ }\n ],\n \"model\": \"text-embedding-3-small\",\n \"usage\": {\n \"prompt_tokens\": 12,\n \"total_tokens\": 12\n }\n}\n" headers: - CF-Cache-Status: - - DYNAMIC CF-RAY: - - 908a48e51e29fa62-SJC + - 908b749c8cb41576-SJC Connection: - keep-alive Content-Encoding: @@ -52,14 +50,14 @@ interactions: Content-Type: - application/json Date: - - Mon, 27 Jan 2025 16:57:58 GMT + - Mon, 27 Jan 2025 20:22:34 GMT Server: - cloudflare Set-Cookie: - - __cf_bm=PnGR9W8IIvattKIp.MgRbS6OXN_OoVwF8NILhGbvwWU-1737997078-1.0.1.1-9exwtgs6yYb6fCPo95Vs352XeTUIN.qWcJzCkNYp5punfMhxyEoe5f9oJbvTxF0DKntUg6RDpzJpWg9_hT3RdA; - path=/; expires=Mon, 27-Jan-25 17:27:58 GMT; domain=.api.openai.com; HttpOnly; + - __cf_bm=NhRx2kcSiBEOhkZbWaKlY_pw46LGzb7BpUNF.ozrJrY-1738009354-1.0.1.1-naI_MYI5l4_BbeD3mwpu.Pi55FVDn3ImnfFjreNp0bbAvTuf8xOJY8HgxhE.W4XWbq247SbevyoE9aStMYq0ow; + path=/; expires=Mon, 27-Jan-25 20:52:34 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - - _cfuvid=7_7dz2Zqwfh8UnqUsLkZH.Oqpep1CNkZGOK89RB4q5A-1737997078040-0.0.1.1-604800000; + - _cfuvid=xnfGIFZVE6LqgVkRMk6ORXsMurOmTu.z7TTz7afn810-1738009354083-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None Transfer-Encoding: - chunked @@ -71,20 +69,22 @@ interactions: - X-Request-ID alt-svc: - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC openai-model: - text-embedding-3-small openai-organization: - crewai-iuxna1 openai-processing-ms: - - '178' + - '75' openai-version: - '2020-10-01' strict-transport-security: - max-age=31536000; includeSubDomains; preload via: - - envoy-router-6868496db6-8f7sk + - envoy-router-75f99bb574-mb9tb x-envoy-upstream-service-time: - - '126' + - '29' x-ratelimit-limit-requests: - '10000' x-ratelimit-limit-tokens: @@ -98,7 +98,7 @@ interactions: x-ratelimit-reset-tokens: - 0s x-request-id: - - req_9f0e081d4c6f1a03fa8cbe266c95685a + - req_4e3d0c147826a183e2848ca1df2c9da9 http_version: HTTP/1.1 status_code: 200 - request: @@ -146,7 +146,7 @@ interactions: CF-Cache-Status: - DYNAMIC CF-RAY: - - 908a48eaac437ac7-SJC + - 908b749fcdbaed36-SJC Connection: - keep-alive Content-Encoding: @@ -154,14 +154,14 @@ interactions: Content-Type: - application/json Date: - - Mon, 27 Jan 2025 16:57:58 GMT + - Mon, 27 Jan 2025 20:22:34 GMT Server: - cloudflare Set-Cookie: - - __cf_bm=ewnMVB82RxYrCuvVlXzYeFQSi1DJU2l8YaKrywP1MGM-1737997078-1.0.1.1-.UdOh2dv0U8M4y49yXJHPAQr2EKtKOi6cuQlwLUzhlKvGy7WY7WFsEg43UInPqtTfLtGtqnFWZGuLjwhbFlPUw; - path=/; expires=Mon, 27-Jan-25 17:27:58 GMT; domain=.api.openai.com; HttpOnly; + - __cf_bm=hTW9TNu3pB35yAIOgg3sdy1hLtP_un1Js4.ZfsmNEXY-1738009354-1.0.1.1-pmAOhPxdO75O.Xt22Tnz_8pitmTMJY.vDeWPxXlJq3TTay0D.285FuCezcz8iy6gLi0hF9SRUc5f5xovdsaQOA; + path=/; expires=Mon, 27-Jan-25 20:52:34 GMT; domain=.api.openai.com; HttpOnly; Secure; SameSite=None - - _cfuvid=fVidGpnlMSfhxdaGcP5YjF.u3MkGlc9LyRisqFGHQHg-1737997078492-0.0.1.1-604800000; + - _cfuvid=KXf4AO65W0FpWKL_jL5Tw4Xdts32F1mkwYcniiqUZtU-1738009354603-0.0.1.1-604800000; path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None Transfer-Encoding: - chunked @@ -178,15 +178,15 @@ interactions: openai-organization: - crewai-iuxna1 openai-processing-ms: - - '56' + - '113' openai-version: - '2020-10-01' strict-transport-security: - max-age=31536000; includeSubDomains; preload via: - - envoy-router-5cc9fb545f-jkfdr + - envoy-router-5cc9fb545f-x4k6f x-envoy-upstream-service-time: - - '35' + - '74' x-ratelimit-limit-requests: - '10000' x-ratelimit-limit-tokens: @@ -200,7 +200,7 @@ interactions: x-ratelimit-reset-tokens: - 0s x-request-id: - - req_6f75f8d53cf83149ed93bfee2f0a740f + - req_7b9c56b5c3be975b8ce088f3457a52f9 http_version: HTTP/1.1 status_code: 200 version: 1