crewAI/crewai_tools/tools/couchbase_tool/couchbase_tool.py

import json
import os
from typing import Any, Optional, Type, List, Dict, Callable

try:
    import couchbase.search as search
    from couchbase.cluster import Cluster
    from couchbase.options import SearchOptions
    from couchbase.vector_search import VectorQuery, VectorSearch

    COUCHBASE_AVAILABLE = True
except ImportError:
    COUCHBASE_AVAILABLE = False
    search = Any
    Cluster = Any
    SearchOptions = Any
    VectorQuery = Any
    VectorSearch = Any

from crewai.tools import BaseTool
from pydantic import BaseModel, Field, SkipValidation


class CouchbaseToolSchema(BaseModel):
    """Input for CouchbaseTool."""

    query: str = Field(
        ...,
        description="The query to search retrieve relevant information from the Couchbase database. Pass only the query, not the question.",
    )

class CouchbaseFTSVectorSearchTool(BaseTool):
    """Tool to search the Couchbase database"""

    model_config = {"arbitrary_types_allowed": True}
    name: str = "CouchbaseFTSVectorSearchTool"
    description: str = "A tool to search the Couchbase database for relevant information on internal documents."
    args_schema: Type[BaseModel] = CouchbaseToolSchema
    cluster: SkipValidation[Optional[Cluster]] = None
    collection_name: Optional[str] = None,
    scope_name: Optional[str] = None,
    bucket_name: Optional[str] = None,
    index_name: Optional[str] = None,
    embedding_key: Optional[str] = Field(
        default="embedding",
        description="Name of the field in the search index that stores the vector"
    )
    scoped_index: Optional[bool] = Field(
        default=True,
        description="Specify whether the index is scoped. Is True by default."
    ),
    limit: Optional[int] = Field(default=3)
    embedding_function: SkipValidation[Callable[[str], List[float]]] = Field(
        default=None,
        description="A function that takes a string and returns a list of floats. This is used to embed the query before searching the database."
    )

    def _check_bucket_exists(self) -> bool:
        """Check if the bucket exists in the linked Couchbase cluster"""
        bucket_manager = self.cluster.buckets()
        try:
            bucket_manager.get_bucket(self.bucket_name)
            return True
        except Exception:
            return False

    def _check_scope_and_collection_exists(self) -> bool:
        """Check if the scope and collection exists in the linked Couchbase bucket
        Raises a ValueError if either is not found"""
        scope_collection_map: Dict[str, Any] = {}

        # Get a list of all scopes in the bucket
        for scope in self._bucket.collections().get_all_scopes():
            scope_collection_map[scope.name] = []

            # Get a list of all the collections in the scope
            for collection in scope.collections:
                scope_collection_map[scope.name].append(collection.name)

        # Check if the scope exists
        if self.scope_name not in scope_collection_map.keys():
            raise ValueError(
                f"Scope {self.scope_name} not found in Couchbase "
                f"bucket {self.bucket_name}"
            )

        # Check if the collection exists in the scope
        if self.collection_name not in scope_collection_map[self.scope_name]:
            raise ValueError(
                f"Collection {self.collection_name} not found in scope "
                f"{self.scope_name} in Couchbase bucket {self.bucket_name}"
            )

        return True

    def _check_index_exists(self) -> bool:
        """Check if the Search index exists in the linked Couchbase cluster
        Raises a ValueError if the index does not exist"""
        if self.scoped_index:
            all_indexes = [
                index.name for index in self._scope.search_indexes().get_all_indexes()
            ]
            if self.index_name not in all_indexes:
                raise ValueError(
                    f"Index {self.index_name} does not exist. "
                    " Please create the index before searching."
                )
        else:
            all_indexes = [
                index.name for index in self.cluster.search_indexes().get_all_indexes()
            ]
            if self.index_name not in all_indexes:
                raise ValueError(
                    f"Index {self.index_name} does not exist. "
                    " Please create the index before searching."
                )

        return True

    def __init__(self, **kwargs):
        """Initialize the CouchbaseFTSVectorSearchTool.

        Args:
            **kwargs: Keyword arguments to pass to the BaseTool constructor and
                      to configure the Couchbase connection and search parameters.
                      Requires 'cluster', 'bucket_name', 'scope_name',
                      'collection_name', 'index_name', and 'embedding_function'.

        Raises:
            ValueError: If required parameters are missing, the Couchbase cluster
                        cannot be reached, or the specified bucket, scope,
                        collection, or index does not exist.
        """
        super().__init__(**kwargs)
        if COUCHBASE_AVAILABLE:
            try:
                if not self.cluster:
                    raise ValueError("Cluster instance must be provided")

                if not self.bucket_name:
                    raise ValueError("Bucket name must be provided")

                if not self.scope_name:
                    raise ValueError("Scope name must be provided")

                if not self.collection_name:
                    raise ValueError("Collection name must be provided")

                if not self.index_name:
                    raise ValueError("Index name must be provided")

                if not self.embedding_function:
                    raise ValueError("Embedding function must be provided")

                self._bucket = self.cluster.bucket(self.bucket_name)
                self._scope = self._bucket.scope(self.scope_name)
                self._collection = self._scope.collection(self.collection_name)
            except Exception as e:
                raise ValueError(
                    "Error connecting to couchbase. "
                    "Please check the connection and credentials"
                ) from e

            # check if bucket exists
            if not self._check_bucket_exists():
                raise ValueError(
                    f"Bucket {self.bucket_name} does not exist. "
                    " Please create the bucket before searching."
                )

            self._check_scope_and_collection_exists()
            self._check_index_exists()
        else:
            import click

            if click.confirm(
                "The 'couchbase' package is required to use the CouchbaseFTSVectorSearchTool. "
                "Would you like to install it?"
            ):
                import subprocess

                subprocess.run(["uv", "add", "couchbase"], check=True)
            else:
                raise ImportError(
                    "The 'couchbase' package is required to use the CouchbaseFTSVectorSearchTool. "
                    "Please install it with: uv add couchbase"
                )

    def _run(self, query: str) -> str:
        """Execute a vector search query against the Couchbase index.

        Args:
            query: The search query string.

        Returns:
            A JSON string containing the search results.

        Raises:
            ValueError: If the search query fails or returns results without fields.
        """
        query_embedding = self.embedding_function(query)
        fields = ["*"]

        search_req = search.SearchRequest.create(
            VectorSearch.from_vector_query(
                VectorQuery(
                    self.embedding_key,
                    query_embedding,
                    self.limit
                )
            )
        )

        try:
            if self.scoped_index:
                search_iter = self._scope.search(
                    self.index_name,
                    search_req,
                    SearchOptions(
                        limit=self.limit,
                        fields=fields,
                    )
                )
            else:
                search_iter = self.cluster.search(
                    self.index_name,
                    search_req,
                    SearchOptions(
                        limit=self.limit,
                        fields=fields
                    )
                )

            json_response = []

            for row in search_iter.rows():
                json_response.append(row.fields)
        except Exception as e:
            return f"Search failed with error: {e}"

        return json.dumps(json_response, indent=2)