supporting vertex through api key use - expo mode

2026-01-16 03:28:30 +00:00 · 2026-01-15 14:34:07 -08:00
parent 5645cbb22e
commit 5beaea189b
3 changed files with 170 additions and 12 deletions
--- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py
+++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py
@@ -54,15 +54,21 @@ class GeminiCompletion(BaseLLM):
        safety_settings: dict[str, Any] | None = None,
        client_params: dict[str, Any] | None = None,
        interceptor: BaseInterceptor[Any, Any] | None = None,
+        use_vertexai: bool | None = None,
        **kwargs: Any,
    ):
        """Initialize Google Gemini chat completion client.

        Args:
            model: Gemini model name (e.g., 'gemini-2.0-flash-001', 'gemini-1.5-pro')
-            api_key: Google API key (defaults to GOOGLE_API_KEY or GEMINI_API_KEY env var)
-            project: Google Cloud project ID (for Vertex AI)
-            location: Google Cloud location (for Vertex AI, defaults to 'us-central1')
+            api_key: Google API key for Gemini API authentication.
+                    Defaults to GOOGLE_API_KEY or GEMINI_API_KEY env var.
+                    NOTE: Cannot be used with Vertex AI (project parameter). Use Gemini API instead.
+            project: Google Cloud project ID for Vertex AI with ADC authentication.
+                    Requires Application Default Credentials (gcloud auth application-default login).
+                    NOTE: Vertex AI does NOT support API keys, only OAuth2/ADC.
+                    If both api_key and project are set, api_key takes precedence.
+            location: Google Cloud location (for Vertex AI with ADC, defaults to 'us-central1')
            temperature: Sampling temperature (0-2)
            top_p: Nucleus sampling parameter
            top_k: Top-k sampling parameter
@@ -73,6 +79,12 @@ class GeminiCompletion(BaseLLM):
            client_params: Additional parameters to pass to the Google Gen AI Client constructor.
                          Supports parameters like http_options, credentials, debug_config, etc.
            interceptor: HTTP interceptor (not yet supported for Gemini).
+            use_vertexai: Whether to use Vertex AI instead of Gemini API.
+                         - True: Use Vertex AI (with ADC or Express mode with API key)
+                         - False: Use Gemini API (explicitly override env var)
+                         - None (default): Check GOOGLE_GENAI_USE_VERTEXAI env var
+                         When using Vertex AI with API key (Express mode), http_options with
+                         api_version="v1" is automatically configured.
            **kwargs: Additional parameters
        """
        if interceptor is not None:
@@ -95,7 +107,8 @@ class GeminiCompletion(BaseLLM):
        self.project = project or os.getenv("GOOGLE_CLOUD_PROJECT")
        self.location = location or os.getenv("GOOGLE_CLOUD_LOCATION") or "us-central1"

-        use_vertexai = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true"
+        if use_vertexai is None:
+            use_vertexai = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true"

        self.client = self._initialize_client(use_vertexai)

@@ -146,13 +159,34 @@ class GeminiCompletion(BaseLLM):

        Returns:
            Initialized Google Gen AI Client
+
+        Note:
+            Google Gen AI SDK has two distinct endpoints with different auth requirements:
+            - Gemini API (generativelanguage.googleapis.com): Supports API key authentication
+            - Vertex AI (aiplatform.googleapis.com): Only supports OAuth2/ADC, NO API keys
+
+            When vertexai=True is set, it routes to aiplatform.googleapis.com which rejects
+            API keys. Use Gemini API endpoint for API key authentication instead.
        """
        client_params = {}

        if self.client_params:
            client_params.update(self.client_params)

-        if use_vertexai or self.project:
+        # Determine authentication mode based on available credentials
+        has_api_key = bool(self.api_key)
+        has_project = bool(self.project)
+
+        if has_api_key and has_project:
+            logging.warning(
+                "Both API key and project provided. Using API key authentication. "
+                "Project/location parameters are ignored when using API keys. "
+                "To use Vertex AI with ADC, remove the api_key parameter."
+            )
+            has_project = False
+
+        # Vertex AI with ADC (project without API key)
+        if (use_vertexai or has_project) and not has_api_key:
            client_params.update(
                {
                    "vertexai": True,
@@ -161,12 +195,20 @@ class GeminiCompletion(BaseLLM):
                }
            )

-            client_params.pop("api_key", None)
-
-        elif self.api_key:
+        # API key authentication (works with both Gemini API and Vertex AI Express)
+        elif has_api_key:
            client_params["api_key"] = self.api_key

-            client_params.pop("vertexai", None)
+            # Vertex AI Express mode: API key + vertexai=True + http_options with api_version="v1"
+            # See: https://cloud.google.com/vertex-ai/generative-ai/docs/start/quickstart?usertype=apikey
+            if use_vertexai:
+                client_params["vertexai"] = True
+                client_params["http_options"] = types.HttpOptions(api_version="v1")
+            else:
+                # This ensures we use the Gemini API (generativelanguage.googleapis.com)
+                client_params["vertexai"] = False
+
+            # Clean up project/location (not allowed with API key)
            client_params.pop("project", None)
            client_params.pop("location", None)

@@ -175,10 +217,13 @@ class GeminiCompletion(BaseLLM):
                return genai.Client(**client_params)
            except Exception as e:
                raise ValueError(
-                    "Either GOOGLE_API_KEY/GEMINI_API_KEY (for Gemini API) or "
-                    "GOOGLE_CLOUD_PROJECT (for Vertex AI) must be set"
+                    "Authentication required. Provide one of:\n"
+                    "  1. API key via GOOGLE_API_KEY or GEMINI_API_KEY environment variable\n"
+                    "     (use_vertexai=True is optional for Vertex AI with API key)\n"
+                    "  2. For Vertex AI with ADC: Set GOOGLE_CLOUD_PROJECT and run:\n"
+                    "     gcloud auth application-default login\n"
+                    "  3. Pass api_key parameter directly to LLM constructor\n"
                ) from e
-
        return genai.Client(**client_params)

    def _get_client_params(self) -> dict[str, Any]:
@@ -202,6 +247,8 @@ class GeminiCompletion(BaseLLM):
                    "location": self.location,
                }
            )
+            if self.api_key:
+                params["api_key"] = self.api_key
        elif self.api_key:
            params["api_key"] = self.api_key

--- a/lib/crewai/tests/cassettes/llms/google/test_google_express_mode_works.yaml
+++ b/lib/crewai/tests/cassettes/llms/google/test_google_express_mode_works.yaml
@@ -0,0 +1,75 @@
+interactions:
+- request:
+    body: '{"contents": [{"parts": [{"text": "\nCurrent Task: What is the capital
+      of Japan?\n\nThis is the expected criteria for your final answer: The capital
+      of Japan\nyou MUST return the actual complete content as the final answer, not
+      a summary.\n\nBegin! This is VERY important to you, use the tools available
+      and give your best Final Answer, your job depends on it!\n\nThought:"}], "role":
+      "user"}], "systemInstruction": {"parts": [{"text": "You are Research Assistant.
+      You are a helpful research assistant.\nYour personal goal is: Find information
+      about the capital of Japan\nTo give my best complete final answer to the task
+      respond using the exact following format:\n\nThought: I now can give a great
+      answer\nFinal Answer: Your final answer must be the great and the most complete
+      as possible, it must be outcome described.\n\nI MUST use these formats, my job
+      depends on it!"}], "role": "user"}, "generationConfig": {"stopSequences": ["\nObservation:"]}}'
+    headers:
+      User-Agent:
+      - X-USER-AGENT-XXX
+      accept:
+      - '*/*'
+      accept-encoding:
+      - ACCEPT-ENCODING-XXX
+      connection:
+      - keep-alive
+      content-length:
+      - '952'
+      content-type:
+      - application/json
+      host:
+      - aiplatform.googleapis.com
+      x-goog-api-client:
+      - google-genai-sdk/1.59.0 gl-python/3.13.3
+      x-goog-api-key:
+      - X-GOOG-API-KEY-XXX
+    method: POST
+    uri: https://aiplatform.googleapis.com/v1/publishers/google/models/gemini-2.0-flash-exp:generateContent
+  response:
+    body:
+      string: "{\n  \"candidates\": [\n    {\n      \"content\": {\n        \"role\":
+        \"model\",\n        \"parts\": [\n          {\n            \"text\": \"The
+        capital of Japan is Tokyo.\\nFinal Answer: Tokyo\\n\"\n          }\n        ]\n
+        \     },\n      \"finishReason\": \"STOP\",\n      \"avgLogprobs\": -0.017845841554495003\n
+        \   }\n  ],\n  \"usageMetadata\": {\n    \"promptTokenCount\": 163,\n    \"candidatesTokenCount\":
+        13,\n    \"totalTokenCount\": 176,\n    \"trafficType\": \"ON_DEMAND\",\n
+        \   \"promptTokensDetails\": [\n      {\n        \"modality\": \"TEXT\",\n
+        \       \"tokenCount\": 163\n      }\n    ],\n    \"candidatesTokensDetails\":
+        [\n      {\n        \"modality\": \"TEXT\",\n        \"tokenCount\": 13\n
+        \     }\n    ]\n  },\n  \"modelVersion\": \"gemini-2.0-flash-exp\",\n  \"createTime\":
+        \"2026-01-15T22:27:38.066749Z\",\n  \"responseId\": \"2mlpab2JBNOFidsPh5GigQs\"\n}\n"
+    headers:
+      Alt-Svc:
+      - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000
+      Content-Type:
+      - application/json; charset=UTF-8
+      Date:
+      - Thu, 15 Jan 2026 22:27:38 GMT
+      Server:
+      - scaffolding on HTTPServer2
+      Transfer-Encoding:
+      - chunked
+      Vary:
+      - Origin
+      - X-Origin
+      - Referer
+      X-Content-Type-Options:
+      - X-CONTENT-TYPE-XXX
+      X-Frame-Options:
+      - X-FRAME-OPTIONS-XXX
+      X-XSS-Protection:
+      - '0'
+      content-length:
+      - '786'
+    status:
+      code: 200
+      message: OK
+version: 1
--- a/lib/crewai/tests/llms/google/test_google.py
+++ b/lib/crewai/tests/llms/google/test_google.py
@@ -728,3 +728,39 @@ def test_google_streaming_returns_usage_metrics():
    assert result.token_usage.prompt_tokens > 0
    assert result.token_usage.completion_tokens > 0
    assert result.token_usage.successful_requests >= 1
+
+
+@pytest.mark.vcr()
+def test_google_express_mode_works() -> None:
+    """
+    Test Google Vertex AI Express mode with API key authentication.
+    This tests Vertex AI Express mode (aiplatform.googleapis.com) with API key
+    authentication.
+
+    """
+    with patch.dict(os.environ, {"GOOGLE_GENAI_USE_VERTEXAI": "true"}):
+        agent = Agent(
+            role="Research Assistant",
+            goal="Find information about the capital of Japan",
+            backstory="You are a helpful research assistant.",
+            llm=LLM(
+                model="gemini/gemini-2.0-flash-exp",
+            ),
+            verbose=True,
+        )
+
+        task = Task(
+            description="What is the capital of Japan?",
+            expected_output="The capital of Japan",
+            agent=agent,
+        )
+
+
+        crew = Crew(agents=[agent], tasks=[task])
+        result = crew.kickoff()
+
+        assert result.token_usage is not None
+        assert result.token_usage.total_tokens > 0
+        assert result.token_usage.prompt_tokens > 0
+        assert result.token_usage.completion_tokens > 0
+        assert result.token_usage.successful_requests >= 1