diff --git a/lib/crewai/src/crewai/llms/providers/gemini/completion.py b/lib/crewai/src/crewai/llms/providers/gemini/completion.py index b268f07de..962082755 100644 --- a/lib/crewai/src/crewai/llms/providers/gemini/completion.py +++ b/lib/crewai/src/crewai/llms/providers/gemini/completion.py @@ -54,15 +54,21 @@ class GeminiCompletion(BaseLLM): safety_settings: dict[str, Any] | None = None, client_params: dict[str, Any] | None = None, interceptor: BaseInterceptor[Any, Any] | None = None, + use_vertexai: bool | None = None, **kwargs: Any, ): """Initialize Google Gemini chat completion client. Args: model: Gemini model name (e.g., 'gemini-2.0-flash-001', 'gemini-1.5-pro') - api_key: Google API key (defaults to GOOGLE_API_KEY or GEMINI_API_KEY env var) - project: Google Cloud project ID (for Vertex AI) - location: Google Cloud location (for Vertex AI, defaults to 'us-central1') + api_key: Google API key for Gemini API authentication. + Defaults to GOOGLE_API_KEY or GEMINI_API_KEY env var. + NOTE: Cannot be used with Vertex AI (project parameter). Use Gemini API instead. + project: Google Cloud project ID for Vertex AI with ADC authentication. + Requires Application Default Credentials (gcloud auth application-default login). + NOTE: Vertex AI does NOT support API keys, only OAuth2/ADC. + If both api_key and project are set, api_key takes precedence. + location: Google Cloud location (for Vertex AI with ADC, defaults to 'us-central1') temperature: Sampling temperature (0-2) top_p: Nucleus sampling parameter top_k: Top-k sampling parameter @@ -73,6 +79,12 @@ class GeminiCompletion(BaseLLM): client_params: Additional parameters to pass to the Google Gen AI Client constructor. Supports parameters like http_options, credentials, debug_config, etc. interceptor: HTTP interceptor (not yet supported for Gemini). + use_vertexai: Whether to use Vertex AI instead of Gemini API. + - True: Use Vertex AI (with ADC or Express mode with API key) + - False: Use Gemini API (explicitly override env var) + - None (default): Check GOOGLE_GENAI_USE_VERTEXAI env var + When using Vertex AI with API key (Express mode), http_options with + api_version="v1" is automatically configured. **kwargs: Additional parameters """ if interceptor is not None: @@ -95,7 +107,8 @@ class GeminiCompletion(BaseLLM): self.project = project or os.getenv("GOOGLE_CLOUD_PROJECT") self.location = location or os.getenv("GOOGLE_CLOUD_LOCATION") or "us-central1" - use_vertexai = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true" + if use_vertexai is None: + use_vertexai = os.getenv("GOOGLE_GENAI_USE_VERTEXAI", "").lower() == "true" self.client = self._initialize_client(use_vertexai) @@ -146,13 +159,34 @@ class GeminiCompletion(BaseLLM): Returns: Initialized Google Gen AI Client + + Note: + Google Gen AI SDK has two distinct endpoints with different auth requirements: + - Gemini API (generativelanguage.googleapis.com): Supports API key authentication + - Vertex AI (aiplatform.googleapis.com): Only supports OAuth2/ADC, NO API keys + + When vertexai=True is set, it routes to aiplatform.googleapis.com which rejects + API keys. Use Gemini API endpoint for API key authentication instead. """ client_params = {} if self.client_params: client_params.update(self.client_params) - if use_vertexai or self.project: + # Determine authentication mode based on available credentials + has_api_key = bool(self.api_key) + has_project = bool(self.project) + + if has_api_key and has_project: + logging.warning( + "Both API key and project provided. Using API key authentication. " + "Project/location parameters are ignored when using API keys. " + "To use Vertex AI with ADC, remove the api_key parameter." + ) + has_project = False + + # Vertex AI with ADC (project without API key) + if (use_vertexai or has_project) and not has_api_key: client_params.update( { "vertexai": True, @@ -161,12 +195,20 @@ class GeminiCompletion(BaseLLM): } ) - client_params.pop("api_key", None) - - elif self.api_key: + # API key authentication (works with both Gemini API and Vertex AI Express) + elif has_api_key: client_params["api_key"] = self.api_key - client_params.pop("vertexai", None) + # Vertex AI Express mode: API key + vertexai=True + http_options with api_version="v1" + # See: https://cloud.google.com/vertex-ai/generative-ai/docs/start/quickstart?usertype=apikey + if use_vertexai: + client_params["vertexai"] = True + client_params["http_options"] = types.HttpOptions(api_version="v1") + else: + # This ensures we use the Gemini API (generativelanguage.googleapis.com) + client_params["vertexai"] = False + + # Clean up project/location (not allowed with API key) client_params.pop("project", None) client_params.pop("location", None) @@ -175,10 +217,13 @@ class GeminiCompletion(BaseLLM): return genai.Client(**client_params) except Exception as e: raise ValueError( - "Either GOOGLE_API_KEY/GEMINI_API_KEY (for Gemini API) or " - "GOOGLE_CLOUD_PROJECT (for Vertex AI) must be set" + "Authentication required. Provide one of:\n" + " 1. API key via GOOGLE_API_KEY or GEMINI_API_KEY environment variable\n" + " (use_vertexai=True is optional for Vertex AI with API key)\n" + " 2. For Vertex AI with ADC: Set GOOGLE_CLOUD_PROJECT and run:\n" + " gcloud auth application-default login\n" + " 3. Pass api_key parameter directly to LLM constructor\n" ) from e - return genai.Client(**client_params) def _get_client_params(self) -> dict[str, Any]: @@ -202,6 +247,8 @@ class GeminiCompletion(BaseLLM): "location": self.location, } ) + if self.api_key: + params["api_key"] = self.api_key elif self.api_key: params["api_key"] = self.api_key diff --git a/lib/crewai/tests/cassettes/llms/google/test_google_express_mode_works.yaml b/lib/crewai/tests/cassettes/llms/google/test_google_express_mode_works.yaml new file mode 100644 index 000000000..2cd20980d --- /dev/null +++ b/lib/crewai/tests/cassettes/llms/google/test_google_express_mode_works.yaml @@ -0,0 +1,75 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "\nCurrent Task: What is the capital + of Japan?\n\nThis is the expected criteria for your final answer: The capital + of Japan\nyou MUST return the actual complete content as the final answer, not + a summary.\n\nBegin! This is VERY important to you, use the tools available + and give your best Final Answer, your job depends on it!\n\nThought:"}], "role": + "user"}], "systemInstruction": {"parts": [{"text": "You are Research Assistant. + You are a helpful research assistant.\nYour personal goal is: Find information + about the capital of Japan\nTo give my best complete final answer to the task + respond using the exact following format:\n\nThought: I now can give a great + answer\nFinal Answer: Your final answer must be the great and the most complete + as possible, it must be outcome described.\n\nI MUST use these formats, my job + depends on it!"}], "role": "user"}, "generationConfig": {"stopSequences": ["\nObservation:"]}}' + headers: + User-Agent: + - X-USER-AGENT-XXX + accept: + - '*/*' + accept-encoding: + - ACCEPT-ENCODING-XXX + connection: + - keep-alive + content-length: + - '952' + content-type: + - application/json + host: + - aiplatform.googleapis.com + x-goog-api-client: + - google-genai-sdk/1.59.0 gl-python/3.13.3 + x-goog-api-key: + - X-GOOG-API-KEY-XXX + method: POST + uri: https://aiplatform.googleapis.com/v1/publishers/google/models/gemini-2.0-flash-exp:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"role\": + \"model\",\n \"parts\": [\n {\n \"text\": \"The + capital of Japan is Tokyo.\\nFinal Answer: Tokyo\\n\"\n }\n ]\n + \ },\n \"finishReason\": \"STOP\",\n \"avgLogprobs\": -0.017845841554495003\n + \ }\n ],\n \"usageMetadata\": {\n \"promptTokenCount\": 163,\n \"candidatesTokenCount\": + 13,\n \"totalTokenCount\": 176,\n \"trafficType\": \"ON_DEMAND\",\n + \ \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 163\n }\n ],\n \"candidatesTokensDetails\": + [\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 13\n + \ }\n ]\n },\n \"modelVersion\": \"gemini-2.0-flash-exp\",\n \"createTime\": + \"2026-01-15T22:27:38.066749Z\",\n \"responseId\": \"2mlpab2JBNOFidsPh5GigQs\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Type: + - application/json; charset=UTF-8 + Date: + - Thu, 15 Jan 2026 22:27:38 GMT + Server: + - scaffolding on HTTPServer2 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - X-CONTENT-TYPE-XXX + X-Frame-Options: + - X-FRAME-OPTIONS-XXX + X-XSS-Protection: + - '0' + content-length: + - '786' + status: + code: 200 + message: OK +version: 1 diff --git a/lib/crewai/tests/llms/google/test_google.py b/lib/crewai/tests/llms/google/test_google.py index 37f591de6..167215f12 100644 --- a/lib/crewai/tests/llms/google/test_google.py +++ b/lib/crewai/tests/llms/google/test_google.py @@ -728,3 +728,39 @@ def test_google_streaming_returns_usage_metrics(): assert result.token_usage.prompt_tokens > 0 assert result.token_usage.completion_tokens > 0 assert result.token_usage.successful_requests >= 1 + + +@pytest.mark.vcr() +def test_google_express_mode_works() -> None: + """ + Test Google Vertex AI Express mode with API key authentication. + This tests Vertex AI Express mode (aiplatform.googleapis.com) with API key + authentication. + + """ + with patch.dict(os.environ, {"GOOGLE_GENAI_USE_VERTEXAI": "true"}): + agent = Agent( + role="Research Assistant", + goal="Find information about the capital of Japan", + backstory="You are a helpful research assistant.", + llm=LLM( + model="gemini/gemini-2.0-flash-exp", + ), + verbose=True, + ) + + task = Task( + description="What is the capital of Japan?", + expected_output="The capital of Japan", + agent=agent, + ) + + + crew = Crew(agents=[agent], tasks=[task]) + result = crew.kickoff() + + assert result.token_usage is not None + assert result.token_usage.total_tokens > 0 + assert result.token_usage.prompt_tokens > 0 + assert result.token_usage.completion_tokens > 0 + assert result.token_usage.successful_requests >= 1