diff --git a/lib/crewai-tools/src/crewai_tools/adapters/lancedb_adapter.py b/lib/crewai-tools/src/crewai_tools/adapters/lancedb_adapter.py index 3fd8d8e2c..e90088d65 100644 --- a/lib/crewai-tools/src/crewai_tools/adapters/lancedb_adapter.py +++ b/lib/crewai-tools/src/crewai_tools/adapters/lancedb_adapter.py @@ -14,6 +14,22 @@ from crewai_tools.tools.rag.rag_tool import Adapter def _default_embedding_function(): + """Create a default embedding function using OpenAI's text-embedding-ada-002 model. + + This function creates and returns an embedding function that uses OpenAI's API + to generate embeddings for text inputs. The embedding function is used by the + LanceDBAdapter to convert text queries into vector representations for similarity search. + + Returns: + Callable: A function that takes a list of strings and returns their embeddings + as a list of vectors. + + Example: + >>> embed_fn = _default_embedding_function() + >>> embeddings = embed_fn(["Hello world"]) + >>> len(embeddings[0]) # Vector dimension + 1536 + """ client = OpenAIClient() def _embedding_function(input): @@ -24,6 +40,32 @@ def _default_embedding_function(): class LanceDBAdapter(Adapter): + """Adapter for integrating LanceDB vector database with CrewAI RAG tools. + + LanceDBAdapter provides a bridge between CrewAI's RAG (Retrieval-Augmented Generation) + system and LanceDB, enabling efficient vector similarity search for knowledge retrieval. + It handles embedding generation, vector search, and data ingestion with precise control + over query parameters and column mappings. + + Attributes: + uri: Database connection URI or path to the LanceDB database. + table_name: Name of the table to query within the LanceDB database. + embedding_function: Function to convert text into embeddings. Defaults to OpenAI's + text-embedding-ada-002 model. + top_k: Number of top results to return from similarity search. Defaults to 3. + vector_column_name: Name of the column containing vector embeddings. Defaults to "vector". + text_column_name: Name of the column containing text content. Defaults to "text". + + Example: + >>> from crewai_tools.adapters.lancedb_adapter import LanceDBAdapter + >>> adapter = LanceDBAdapter( + ... uri="./my_lancedb", + ... table_name="documents", + ... top_k=5 + ... ) + >>> results = adapter.query("What is machine learning?") + >>> print(results) + """ uri: str | Path table_name: str embedding_function: Callable = Field(default_factory=_default_embedding_function) @@ -35,12 +77,44 @@ class LanceDBAdapter(Adapter): _table: LanceDBTable = PrivateAttr() def model_post_init(self, __context: Any) -> None: + """Initialize the database connection and table after model instantiation. + + This method is automatically called after the Pydantic model is initialized. + It establishes the connection to the LanceDB database and opens the specified + table for querying and data operations. + + Args: + __context: Pydantic context object passed during initialization. + + Raises: + Exception: If the database connection fails or the table does not exist. + """ self._db = lancedb_connect(self.uri) self._table = self._db.open_table(self.table_name) super().model_post_init(__context) def query(self, question: str) -> str: # type: ignore[override] + """Perform a vector similarity search for the given question. + + This method converts the input question into an embedding vector and searches + the LanceDB table for the most similar entries. It returns the top-k results + based on vector similarity, providing precise retrieval for RAG applications. + + Args: + question: The text query to search for in the vector database. + + Returns: + A string containing the concatenated text results from the top-k most + similar entries, separated by newlines. + + Example: + >>> adapter = LanceDBAdapter(uri="./db", table_name="docs") + >>> results = adapter.query("What is CrewAI?") + >>> print(results) + CrewAI is a framework for orchestrating AI agents... + CrewAI provides precise control over agent workflows... + """ query = self.embedding_function([question])[0] results = ( self._table.search(query, vector_column_name=self.vector_column_name) @@ -56,4 +130,23 @@ class LanceDBAdapter(Adapter): *args: Any, **kwargs: Any, ) -> None: + """Add data to the LanceDB table. + + This method provides a direct interface to add new records to the underlying + LanceDB table. It accepts the same arguments as the LanceDB table's add method, + allowing flexible data ingestion for building knowledge bases. + + Args: + *args: Positional arguments to pass to the LanceDB table's add method. + **kwargs: Keyword arguments to pass to the LanceDB table's add method. + Common kwargs include 'data' (list of records) and 'mode' (append/overwrite). + + Example: + >>> adapter = LanceDBAdapter(uri="./db", table_name="docs") + >>> data = [ + ... {"text": "CrewAI enables agent collaboration", "vector": [0.1, 0.2, ...]}, + ... {"text": "LanceDB provides vector storage", "vector": [0.3, 0.4, ...]} + ... ] + >>> adapter.add(data=data) + """ self._table.add(*args, **kwargs) diff --git a/lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py b/lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py new file mode 100644 index 000000000..ce37ad3be --- /dev/null +++ b/lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py @@ -0,0 +1,62 @@ +"""Test that LanceDB adapter has proper docstrings.""" + +import inspect + +import pytest + +lancedb = pytest.importorskip("lancedb") + +from crewai_tools.adapters.lancedb_adapter import ( + LanceDBAdapter, + _default_embedding_function, +) + + +def test_lancedb_adapter_class_has_docstring(): + """Verify that LanceDBAdapter class has a docstring.""" + assert LanceDBAdapter.__doc__ is not None, "LanceDBAdapter class is missing a docstring" + assert len(LanceDBAdapter.__doc__.strip()) > 0, "LanceDBAdapter docstring is empty" + + +def test_lancedb_adapter_model_post_init_has_docstring(): + """Verify that model_post_init method has a docstring.""" + assert ( + LanceDBAdapter.model_post_init.__doc__ is not None + ), "model_post_init method is missing a docstring" + assert ( + len(LanceDBAdapter.model_post_init.__doc__.strip()) > 0 + ), "model_post_init docstring is empty" + + +def test_lancedb_adapter_query_has_docstring(): + """Verify that query method has a docstring.""" + assert LanceDBAdapter.query.__doc__ is not None, "query method is missing a docstring" + assert len(LanceDBAdapter.query.__doc__.strip()) > 0, "query docstring is empty" + + +def test_lancedb_adapter_add_has_docstring(): + """Verify that add method has a docstring.""" + assert LanceDBAdapter.add.__doc__ is not None, "add method is missing a docstring" + assert len(LanceDBAdapter.add.__doc__.strip()) > 0, "add docstring is empty" + + +def test_default_embedding_function_has_docstring(): + """Verify that _default_embedding_function has a docstring.""" + assert ( + _default_embedding_function.__doc__ is not None + ), "_default_embedding_function is missing a docstring" + assert ( + len(_default_embedding_function.__doc__.strip()) > 0 + ), "_default_embedding_function docstring is empty" + + +def test_docstrings_contain_required_sections(): + """Verify that docstrings contain Args, Returns, or Example sections where appropriate.""" + query_doc = LanceDBAdapter.query.__doc__ + assert query_doc is not None + assert "Args:" in query_doc or "Parameters:" in query_doc, "query docstring should have Args/Parameters section" + assert "Returns:" in query_doc, "query docstring should have Returns section" + + add_doc = LanceDBAdapter.add.__doc__ + assert add_doc is not None + assert "Args:" in add_doc or "Parameters:" in add_doc, "add docstring should have Args/Parameters section"