mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
Add comprehensive docstrings to LanceDB adapter
- Add Google-style docstrings to all public functions and classes in lancedb_adapter.py - Include Args, Returns, Raises, and Example sections where appropriate - Add test file to verify docstrings exist for all public API methods - Addresses issue #3955 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -14,6 +14,22 @@ from crewai_tools.tools.rag.rag_tool import Adapter
|
||||
|
||||
|
||||
def _default_embedding_function():
|
||||
"""Create a default embedding function using OpenAI's text-embedding-ada-002 model.
|
||||
|
||||
This function creates and returns an embedding function that uses OpenAI's API
|
||||
to generate embeddings for text inputs. The embedding function is used by the
|
||||
LanceDBAdapter to convert text queries into vector representations for similarity search.
|
||||
|
||||
Returns:
|
||||
Callable: A function that takes a list of strings and returns their embeddings
|
||||
as a list of vectors.
|
||||
|
||||
Example:
|
||||
>>> embed_fn = _default_embedding_function()
|
||||
>>> embeddings = embed_fn(["Hello world"])
|
||||
>>> len(embeddings[0]) # Vector dimension
|
||||
1536
|
||||
"""
|
||||
client = OpenAIClient()
|
||||
|
||||
def _embedding_function(input):
|
||||
@@ -24,6 +40,32 @@ def _default_embedding_function():
|
||||
|
||||
|
||||
class LanceDBAdapter(Adapter):
|
||||
"""Adapter for integrating LanceDB vector database with CrewAI RAG tools.
|
||||
|
||||
LanceDBAdapter provides a bridge between CrewAI's RAG (Retrieval-Augmented Generation)
|
||||
system and LanceDB, enabling efficient vector similarity search for knowledge retrieval.
|
||||
It handles embedding generation, vector search, and data ingestion with precise control
|
||||
over query parameters and column mappings.
|
||||
|
||||
Attributes:
|
||||
uri: Database connection URI or path to the LanceDB database.
|
||||
table_name: Name of the table to query within the LanceDB database.
|
||||
embedding_function: Function to convert text into embeddings. Defaults to OpenAI's
|
||||
text-embedding-ada-002 model.
|
||||
top_k: Number of top results to return from similarity search. Defaults to 3.
|
||||
vector_column_name: Name of the column containing vector embeddings. Defaults to "vector".
|
||||
text_column_name: Name of the column containing text content. Defaults to "text".
|
||||
|
||||
Example:
|
||||
>>> from crewai_tools.adapters.lancedb_adapter import LanceDBAdapter
|
||||
>>> adapter = LanceDBAdapter(
|
||||
... uri="./my_lancedb",
|
||||
... table_name="documents",
|
||||
... top_k=5
|
||||
... )
|
||||
>>> results = adapter.query("What is machine learning?")
|
||||
>>> print(results)
|
||||
"""
|
||||
uri: str | Path
|
||||
table_name: str
|
||||
embedding_function: Callable = Field(default_factory=_default_embedding_function)
|
||||
@@ -35,12 +77,44 @@ class LanceDBAdapter(Adapter):
|
||||
_table: LanceDBTable = PrivateAttr()
|
||||
|
||||
def model_post_init(self, __context: Any) -> None:
|
||||
"""Initialize the database connection and table after model instantiation.
|
||||
|
||||
This method is automatically called after the Pydantic model is initialized.
|
||||
It establishes the connection to the LanceDB database and opens the specified
|
||||
table for querying and data operations.
|
||||
|
||||
Args:
|
||||
__context: Pydantic context object passed during initialization.
|
||||
|
||||
Raises:
|
||||
Exception: If the database connection fails or the table does not exist.
|
||||
"""
|
||||
self._db = lancedb_connect(self.uri)
|
||||
self._table = self._db.open_table(self.table_name)
|
||||
|
||||
super().model_post_init(__context)
|
||||
|
||||
def query(self, question: str) -> str: # type: ignore[override]
|
||||
"""Perform a vector similarity search for the given question.
|
||||
|
||||
This method converts the input question into an embedding vector and searches
|
||||
the LanceDB table for the most similar entries. It returns the top-k results
|
||||
based on vector similarity, providing precise retrieval for RAG applications.
|
||||
|
||||
Args:
|
||||
question: The text query to search for in the vector database.
|
||||
|
||||
Returns:
|
||||
A string containing the concatenated text results from the top-k most
|
||||
similar entries, separated by newlines.
|
||||
|
||||
Example:
|
||||
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
|
||||
>>> results = adapter.query("What is CrewAI?")
|
||||
>>> print(results)
|
||||
CrewAI is a framework for orchestrating AI agents...
|
||||
CrewAI provides precise control over agent workflows...
|
||||
"""
|
||||
query = self.embedding_function([question])[0]
|
||||
results = (
|
||||
self._table.search(query, vector_column_name=self.vector_column_name)
|
||||
@@ -56,4 +130,23 @@ class LanceDBAdapter(Adapter):
|
||||
*args: Any,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
"""Add data to the LanceDB table.
|
||||
|
||||
This method provides a direct interface to add new records to the underlying
|
||||
LanceDB table. It accepts the same arguments as the LanceDB table's add method,
|
||||
allowing flexible data ingestion for building knowledge bases.
|
||||
|
||||
Args:
|
||||
*args: Positional arguments to pass to the LanceDB table's add method.
|
||||
**kwargs: Keyword arguments to pass to the LanceDB table's add method.
|
||||
Common kwargs include 'data' (list of records) and 'mode' (append/overwrite).
|
||||
|
||||
Example:
|
||||
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
|
||||
>>> data = [
|
||||
... {"text": "CrewAI enables agent collaboration", "vector": [0.1, 0.2, ...]},
|
||||
... {"text": "LanceDB provides vector storage", "vector": [0.3, 0.4, ...]}
|
||||
... ]
|
||||
>>> adapter.add(data=data)
|
||||
"""
|
||||
self._table.add(*args, **kwargs)
|
||||
|
||||
62
lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py
Normal file
62
lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py
Normal file
@@ -0,0 +1,62 @@
|
||||
"""Test that LanceDB adapter has proper docstrings."""
|
||||
|
||||
import inspect
|
||||
|
||||
import pytest
|
||||
|
||||
lancedb = pytest.importorskip("lancedb")
|
||||
|
||||
from crewai_tools.adapters.lancedb_adapter import (
|
||||
LanceDBAdapter,
|
||||
_default_embedding_function,
|
||||
)
|
||||
|
||||
|
||||
def test_lancedb_adapter_class_has_docstring():
|
||||
"""Verify that LanceDBAdapter class has a docstring."""
|
||||
assert LanceDBAdapter.__doc__ is not None, "LanceDBAdapter class is missing a docstring"
|
||||
assert len(LanceDBAdapter.__doc__.strip()) > 0, "LanceDBAdapter docstring is empty"
|
||||
|
||||
|
||||
def test_lancedb_adapter_model_post_init_has_docstring():
|
||||
"""Verify that model_post_init method has a docstring."""
|
||||
assert (
|
||||
LanceDBAdapter.model_post_init.__doc__ is not None
|
||||
), "model_post_init method is missing a docstring"
|
||||
assert (
|
||||
len(LanceDBAdapter.model_post_init.__doc__.strip()) > 0
|
||||
), "model_post_init docstring is empty"
|
||||
|
||||
|
||||
def test_lancedb_adapter_query_has_docstring():
|
||||
"""Verify that query method has a docstring."""
|
||||
assert LanceDBAdapter.query.__doc__ is not None, "query method is missing a docstring"
|
||||
assert len(LanceDBAdapter.query.__doc__.strip()) > 0, "query docstring is empty"
|
||||
|
||||
|
||||
def test_lancedb_adapter_add_has_docstring():
|
||||
"""Verify that add method has a docstring."""
|
||||
assert LanceDBAdapter.add.__doc__ is not None, "add method is missing a docstring"
|
||||
assert len(LanceDBAdapter.add.__doc__.strip()) > 0, "add docstring is empty"
|
||||
|
||||
|
||||
def test_default_embedding_function_has_docstring():
|
||||
"""Verify that _default_embedding_function has a docstring."""
|
||||
assert (
|
||||
_default_embedding_function.__doc__ is not None
|
||||
), "_default_embedding_function is missing a docstring"
|
||||
assert (
|
||||
len(_default_embedding_function.__doc__.strip()) > 0
|
||||
), "_default_embedding_function docstring is empty"
|
||||
|
||||
|
||||
def test_docstrings_contain_required_sections():
|
||||
"""Verify that docstrings contain Args, Returns, or Example sections where appropriate."""
|
||||
query_doc = LanceDBAdapter.query.__doc__
|
||||
assert query_doc is not None
|
||||
assert "Args:" in query_doc or "Parameters:" in query_doc, "query docstring should have Args/Parameters section"
|
||||
assert "Returns:" in query_doc, "query docstring should have Returns section"
|
||||
|
||||
add_doc = LanceDBAdapter.add.__doc__
|
||||
assert add_doc is not None
|
||||
assert "Args:" in add_doc or "Parameters:" in add_doc, "add docstring should have Args/Parameters section"
|
||||
Reference in New Issue
Block a user