mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-16 04:18:35 +00:00
Add comprehensive docstrings to LanceDB adapter
- Add Google-style docstrings to all public functions and classes in lancedb_adapter.py - Include Args, Returns, Raises, and Example sections where appropriate - Add test file to verify docstrings exist for all public API methods - Addresses issue #3955 Co-Authored-By: João <joao@crewai.com>
This commit is contained in:
@@ -14,6 +14,22 @@ from crewai_tools.tools.rag.rag_tool import Adapter
|
|||||||
|
|
||||||
|
|
||||||
def _default_embedding_function():
|
def _default_embedding_function():
|
||||||
|
"""Create a default embedding function using OpenAI's text-embedding-ada-002 model.
|
||||||
|
|
||||||
|
This function creates and returns an embedding function that uses OpenAI's API
|
||||||
|
to generate embeddings for text inputs. The embedding function is used by the
|
||||||
|
LanceDBAdapter to convert text queries into vector representations for similarity search.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Callable: A function that takes a list of strings and returns their embeddings
|
||||||
|
as a list of vectors.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> embed_fn = _default_embedding_function()
|
||||||
|
>>> embeddings = embed_fn(["Hello world"])
|
||||||
|
>>> len(embeddings[0]) # Vector dimension
|
||||||
|
1536
|
||||||
|
"""
|
||||||
client = OpenAIClient()
|
client = OpenAIClient()
|
||||||
|
|
||||||
def _embedding_function(input):
|
def _embedding_function(input):
|
||||||
@@ -24,6 +40,32 @@ def _default_embedding_function():
|
|||||||
|
|
||||||
|
|
||||||
class LanceDBAdapter(Adapter):
|
class LanceDBAdapter(Adapter):
|
||||||
|
"""Adapter for integrating LanceDB vector database with CrewAI RAG tools.
|
||||||
|
|
||||||
|
LanceDBAdapter provides a bridge between CrewAI's RAG (Retrieval-Augmented Generation)
|
||||||
|
system and LanceDB, enabling efficient vector similarity search for knowledge retrieval.
|
||||||
|
It handles embedding generation, vector search, and data ingestion with precise control
|
||||||
|
over query parameters and column mappings.
|
||||||
|
|
||||||
|
Attributes:
|
||||||
|
uri: Database connection URI or path to the LanceDB database.
|
||||||
|
table_name: Name of the table to query within the LanceDB database.
|
||||||
|
embedding_function: Function to convert text into embeddings. Defaults to OpenAI's
|
||||||
|
text-embedding-ada-002 model.
|
||||||
|
top_k: Number of top results to return from similarity search. Defaults to 3.
|
||||||
|
vector_column_name: Name of the column containing vector embeddings. Defaults to "vector".
|
||||||
|
text_column_name: Name of the column containing text content. Defaults to "text".
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> from crewai_tools.adapters.lancedb_adapter import LanceDBAdapter
|
||||||
|
>>> adapter = LanceDBAdapter(
|
||||||
|
... uri="./my_lancedb",
|
||||||
|
... table_name="documents",
|
||||||
|
... top_k=5
|
||||||
|
... )
|
||||||
|
>>> results = adapter.query("What is machine learning?")
|
||||||
|
>>> print(results)
|
||||||
|
"""
|
||||||
uri: str | Path
|
uri: str | Path
|
||||||
table_name: str
|
table_name: str
|
||||||
embedding_function: Callable = Field(default_factory=_default_embedding_function)
|
embedding_function: Callable = Field(default_factory=_default_embedding_function)
|
||||||
@@ -35,12 +77,44 @@ class LanceDBAdapter(Adapter):
|
|||||||
_table: LanceDBTable = PrivateAttr()
|
_table: LanceDBTable = PrivateAttr()
|
||||||
|
|
||||||
def model_post_init(self, __context: Any) -> None:
|
def model_post_init(self, __context: Any) -> None:
|
||||||
|
"""Initialize the database connection and table after model instantiation.
|
||||||
|
|
||||||
|
This method is automatically called after the Pydantic model is initialized.
|
||||||
|
It establishes the connection to the LanceDB database and opens the specified
|
||||||
|
table for querying and data operations.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
__context: Pydantic context object passed during initialization.
|
||||||
|
|
||||||
|
Raises:
|
||||||
|
Exception: If the database connection fails or the table does not exist.
|
||||||
|
"""
|
||||||
self._db = lancedb_connect(self.uri)
|
self._db = lancedb_connect(self.uri)
|
||||||
self._table = self._db.open_table(self.table_name)
|
self._table = self._db.open_table(self.table_name)
|
||||||
|
|
||||||
super().model_post_init(__context)
|
super().model_post_init(__context)
|
||||||
|
|
||||||
def query(self, question: str) -> str: # type: ignore[override]
|
def query(self, question: str) -> str: # type: ignore[override]
|
||||||
|
"""Perform a vector similarity search for the given question.
|
||||||
|
|
||||||
|
This method converts the input question into an embedding vector and searches
|
||||||
|
the LanceDB table for the most similar entries. It returns the top-k results
|
||||||
|
based on vector similarity, providing precise retrieval for RAG applications.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
question: The text query to search for in the vector database.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
A string containing the concatenated text results from the top-k most
|
||||||
|
similar entries, separated by newlines.
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
|
||||||
|
>>> results = adapter.query("What is CrewAI?")
|
||||||
|
>>> print(results)
|
||||||
|
CrewAI is a framework for orchestrating AI agents...
|
||||||
|
CrewAI provides precise control over agent workflows...
|
||||||
|
"""
|
||||||
query = self.embedding_function([question])[0]
|
query = self.embedding_function([question])[0]
|
||||||
results = (
|
results = (
|
||||||
self._table.search(query, vector_column_name=self.vector_column_name)
|
self._table.search(query, vector_column_name=self.vector_column_name)
|
||||||
@@ -56,4 +130,23 @@ class LanceDBAdapter(Adapter):
|
|||||||
*args: Any,
|
*args: Any,
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
|
"""Add data to the LanceDB table.
|
||||||
|
|
||||||
|
This method provides a direct interface to add new records to the underlying
|
||||||
|
LanceDB table. It accepts the same arguments as the LanceDB table's add method,
|
||||||
|
allowing flexible data ingestion for building knowledge bases.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
*args: Positional arguments to pass to the LanceDB table's add method.
|
||||||
|
**kwargs: Keyword arguments to pass to the LanceDB table's add method.
|
||||||
|
Common kwargs include 'data' (list of records) and 'mode' (append/overwrite).
|
||||||
|
|
||||||
|
Example:
|
||||||
|
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
|
||||||
|
>>> data = [
|
||||||
|
... {"text": "CrewAI enables agent collaboration", "vector": [0.1, 0.2, ...]},
|
||||||
|
... {"text": "LanceDB provides vector storage", "vector": [0.3, 0.4, ...]}
|
||||||
|
... ]
|
||||||
|
>>> adapter.add(data=data)
|
||||||
|
"""
|
||||||
self._table.add(*args, **kwargs)
|
self._table.add(*args, **kwargs)
|
||||||
|
|||||||
62
lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py
Normal file
62
lib/crewai-tools/tests/adapters/test_lancedb_adapter_docs.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
"""Test that LanceDB adapter has proper docstrings."""
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
lancedb = pytest.importorskip("lancedb")
|
||||||
|
|
||||||
|
from crewai_tools.adapters.lancedb_adapter import (
|
||||||
|
LanceDBAdapter,
|
||||||
|
_default_embedding_function,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_lancedb_adapter_class_has_docstring():
|
||||||
|
"""Verify that LanceDBAdapter class has a docstring."""
|
||||||
|
assert LanceDBAdapter.__doc__ is not None, "LanceDBAdapter class is missing a docstring"
|
||||||
|
assert len(LanceDBAdapter.__doc__.strip()) > 0, "LanceDBAdapter docstring is empty"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lancedb_adapter_model_post_init_has_docstring():
|
||||||
|
"""Verify that model_post_init method has a docstring."""
|
||||||
|
assert (
|
||||||
|
LanceDBAdapter.model_post_init.__doc__ is not None
|
||||||
|
), "model_post_init method is missing a docstring"
|
||||||
|
assert (
|
||||||
|
len(LanceDBAdapter.model_post_init.__doc__.strip()) > 0
|
||||||
|
), "model_post_init docstring is empty"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lancedb_adapter_query_has_docstring():
|
||||||
|
"""Verify that query method has a docstring."""
|
||||||
|
assert LanceDBAdapter.query.__doc__ is not None, "query method is missing a docstring"
|
||||||
|
assert len(LanceDBAdapter.query.__doc__.strip()) > 0, "query docstring is empty"
|
||||||
|
|
||||||
|
|
||||||
|
def test_lancedb_adapter_add_has_docstring():
|
||||||
|
"""Verify that add method has a docstring."""
|
||||||
|
assert LanceDBAdapter.add.__doc__ is not None, "add method is missing a docstring"
|
||||||
|
assert len(LanceDBAdapter.add.__doc__.strip()) > 0, "add docstring is empty"
|
||||||
|
|
||||||
|
|
||||||
|
def test_default_embedding_function_has_docstring():
|
||||||
|
"""Verify that _default_embedding_function has a docstring."""
|
||||||
|
assert (
|
||||||
|
_default_embedding_function.__doc__ is not None
|
||||||
|
), "_default_embedding_function is missing a docstring"
|
||||||
|
assert (
|
||||||
|
len(_default_embedding_function.__doc__.strip()) > 0
|
||||||
|
), "_default_embedding_function docstring is empty"
|
||||||
|
|
||||||
|
|
||||||
|
def test_docstrings_contain_required_sections():
|
||||||
|
"""Verify that docstrings contain Args, Returns, or Example sections where appropriate."""
|
||||||
|
query_doc = LanceDBAdapter.query.__doc__
|
||||||
|
assert query_doc is not None
|
||||||
|
assert "Args:" in query_doc or "Parameters:" in query_doc, "query docstring should have Args/Parameters section"
|
||||||
|
assert "Returns:" in query_doc, "query docstring should have Returns section"
|
||||||
|
|
||||||
|
add_doc = LanceDBAdapter.add.__doc__
|
||||||
|
assert add_doc is not None
|
||||||
|
assert "Args:" in add_doc or "Parameters:" in add_doc, "add docstring should have Args/Parameters section"
|
||||||
Reference in New Issue
Block a user