Add comprehensive docstrings to LanceDB adapter

- Add Google-style docstrings to all public functions and classes in lancedb_adapter.py
- Include Args, Returns, Raises, and Example sections where appropriate
- Add test file to verify docstrings exist for all public API methods
- Addresses issue #3955

Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
Devin AI
2025-11-19 17:58:31 +00:00
parent d160f0874a
commit 88d93cd65b
2 changed files with 155 additions and 0 deletions

View File

@@ -14,6 +14,22 @@ from crewai_tools.tools.rag.rag_tool import Adapter
def _default_embedding_function(): def _default_embedding_function():
"""Create a default embedding function using OpenAI's text-embedding-ada-002 model.
This function creates and returns an embedding function that uses OpenAI's API
to generate embeddings for text inputs. The embedding function is used by the
LanceDBAdapter to convert text queries into vector representations for similarity search.
Returns:
Callable: A function that takes a list of strings and returns their embeddings
as a list of vectors.
Example:
>>> embed_fn = _default_embedding_function()
>>> embeddings = embed_fn(["Hello world"])
>>> len(embeddings[0]) # Vector dimension
1536
"""
client = OpenAIClient() client = OpenAIClient()
def _embedding_function(input): def _embedding_function(input):
@@ -24,6 +40,32 @@ def _default_embedding_function():
class LanceDBAdapter(Adapter): class LanceDBAdapter(Adapter):
"""Adapter for integrating LanceDB vector database with CrewAI RAG tools.
LanceDBAdapter provides a bridge between CrewAI's RAG (Retrieval-Augmented Generation)
system and LanceDB, enabling efficient vector similarity search for knowledge retrieval.
It handles embedding generation, vector search, and data ingestion with precise control
over query parameters and column mappings.
Attributes:
uri: Database connection URI or path to the LanceDB database.
table_name: Name of the table to query within the LanceDB database.
embedding_function: Function to convert text into embeddings. Defaults to OpenAI's
text-embedding-ada-002 model.
top_k: Number of top results to return from similarity search. Defaults to 3.
vector_column_name: Name of the column containing vector embeddings. Defaults to "vector".
text_column_name: Name of the column containing text content. Defaults to "text".
Example:
>>> from crewai_tools.adapters.lancedb_adapter import LanceDBAdapter
>>> adapter = LanceDBAdapter(
... uri="./my_lancedb",
... table_name="documents",
... top_k=5
... )
>>> results = adapter.query("What is machine learning?")
>>> print(results)
"""
uri: str | Path uri: str | Path
table_name: str table_name: str
embedding_function: Callable = Field(default_factory=_default_embedding_function) embedding_function: Callable = Field(default_factory=_default_embedding_function)
@@ -35,12 +77,44 @@ class LanceDBAdapter(Adapter):
_table: LanceDBTable = PrivateAttr() _table: LanceDBTable = PrivateAttr()
def model_post_init(self, __context: Any) -> None: def model_post_init(self, __context: Any) -> None:
"""Initialize the database connection and table after model instantiation.
This method is automatically called after the Pydantic model is initialized.
It establishes the connection to the LanceDB database and opens the specified
table for querying and data operations.
Args:
__context: Pydantic context object passed during initialization.
Raises:
Exception: If the database connection fails or the table does not exist.
"""
self._db = lancedb_connect(self.uri) self._db = lancedb_connect(self.uri)
self._table = self._db.open_table(self.table_name) self._table = self._db.open_table(self.table_name)
super().model_post_init(__context) super().model_post_init(__context)
def query(self, question: str) -> str: # type: ignore[override] def query(self, question: str) -> str: # type: ignore[override]
"""Perform a vector similarity search for the given question.
This method converts the input question into an embedding vector and searches
the LanceDB table for the most similar entries. It returns the top-k results
based on vector similarity, providing precise retrieval for RAG applications.
Args:
question: The text query to search for in the vector database.
Returns:
A string containing the concatenated text results from the top-k most
similar entries, separated by newlines.
Example:
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
>>> results = adapter.query("What is CrewAI?")
>>> print(results)
CrewAI is a framework for orchestrating AI agents...
CrewAI provides precise control over agent workflows...
"""
query = self.embedding_function([question])[0] query = self.embedding_function([question])[0]
results = ( results = (
self._table.search(query, vector_column_name=self.vector_column_name) self._table.search(query, vector_column_name=self.vector_column_name)
@@ -56,4 +130,23 @@ class LanceDBAdapter(Adapter):
*args: Any, *args: Any,
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
"""Add data to the LanceDB table.
This method provides a direct interface to add new records to the underlying
LanceDB table. It accepts the same arguments as the LanceDB table's add method,
allowing flexible data ingestion for building knowledge bases.
Args:
*args: Positional arguments to pass to the LanceDB table's add method.
**kwargs: Keyword arguments to pass to the LanceDB table's add method.
Common kwargs include 'data' (list of records) and 'mode' (append/overwrite).
Example:
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
>>> data = [
... {"text": "CrewAI enables agent collaboration", "vector": [0.1, 0.2, ...]},
... {"text": "LanceDB provides vector storage", "vector": [0.3, 0.4, ...]}
... ]
>>> adapter.add(data=data)
"""
self._table.add(*args, **kwargs) self._table.add(*args, **kwargs)

View File

@@ -0,0 +1,62 @@
"""Test that LanceDB adapter has proper docstrings."""
import inspect
import pytest
lancedb = pytest.importorskip("lancedb")
from crewai_tools.adapters.lancedb_adapter import (
LanceDBAdapter,
_default_embedding_function,
)
def test_lancedb_adapter_class_has_docstring():
"""Verify that LanceDBAdapter class has a docstring."""
assert LanceDBAdapter.__doc__ is not None, "LanceDBAdapter class is missing a docstring"
assert len(LanceDBAdapter.__doc__.strip()) > 0, "LanceDBAdapter docstring is empty"
def test_lancedb_adapter_model_post_init_has_docstring():
"""Verify that model_post_init method has a docstring."""
assert (
LanceDBAdapter.model_post_init.__doc__ is not None
), "model_post_init method is missing a docstring"
assert (
len(LanceDBAdapter.model_post_init.__doc__.strip()) > 0
), "model_post_init docstring is empty"
def test_lancedb_adapter_query_has_docstring():
"""Verify that query method has a docstring."""
assert LanceDBAdapter.query.__doc__ is not None, "query method is missing a docstring"
assert len(LanceDBAdapter.query.__doc__.strip()) > 0, "query docstring is empty"
def test_lancedb_adapter_add_has_docstring():
"""Verify that add method has a docstring."""
assert LanceDBAdapter.add.__doc__ is not None, "add method is missing a docstring"
assert len(LanceDBAdapter.add.__doc__.strip()) > 0, "add docstring is empty"
def test_default_embedding_function_has_docstring():
"""Verify that _default_embedding_function has a docstring."""
assert (
_default_embedding_function.__doc__ is not None
), "_default_embedding_function is missing a docstring"
assert (
len(_default_embedding_function.__doc__.strip()) > 0
), "_default_embedding_function docstring is empty"
def test_docstrings_contain_required_sections():
"""Verify that docstrings contain Args, Returns, or Example sections where appropriate."""
query_doc = LanceDBAdapter.query.__doc__
assert query_doc is not None
assert "Args:" in query_doc or "Parameters:" in query_doc, "query docstring should have Args/Parameters section"
assert "Returns:" in query_doc, "query docstring should have Returns section"
add_doc = LanceDBAdapter.add.__doc__
assert add_doc is not None
assert "Args:" in add_doc or "Parameters:" in add_doc, "add docstring should have Args/Parameters section"