diff --git a/docs/concepts/llms.mdx b/docs/concepts/llms.mdx index 1f01ca0eb..8f895ca61 100644 --- a/docs/concepts/llms.mdx +++ b/docs/concepts/llms.mdx @@ -29,7 +29,7 @@ Large Language Models (LLMs) are the core intelligence behind CrewAI agents. The ## Available Models and Their Capabilities -Here's a detailed breakdown of supported models and their capabilities: +Here's a detailed breakdown of supported models and their capabilities, you can compare performance at [lmarena.ai](https://lmarena.ai/): @@ -43,6 +43,17 @@ Here's a detailed breakdown of supported models and their capabilities: 1 token ≈ 4 characters in English. For example, 8,192 tokens ≈ 32,768 characters or about 6,000 words. + + | Model | Context Window | Best For | + |-------|---------------|-----------| + | Gemini 1.5 Flash | 1M tokens | Balanced multimodal model, good for most tasks | + | Gemini 1.5 Flash 8B | 1M tokens | Fastest, most cost-efficient, good for high-frequency tasks | + | Gemini 1.5 Pro | 2M tokens | Best performing, wide variety of reasoning tasks including logical reasoning, coding, and creative collaboration | + + + Google's Gemini models are all multimodal, supporting audio, images, video and text, supporting context caching, json schema, function calling, etc. + + | Model | Context Window | Best For | |-------|---------------|-----------| @@ -128,10 +139,10 @@ There are three ways to configure LLMs in CrewAI. Choose the method that best fi # llm: anthropic/claude-2.1 # llm: anthropic/claude-2.0 - # Google Models - Good for general tasks - # llm: gemini/gemini-pro + # Google Models - Strong reasoning, large cachable context window, multimodal # llm: gemini/gemini-1.5-pro-latest - # llm: gemini/gemini-1.0-pro-latest + # llm: gemini/gemini-1.5-flash-latest + # llm: gemini/gemini-1.5-flash-8b-latest # AWS Bedrock Models - Enterprise-grade # llm: bedrock/anthropic.claude-3-sonnet-20240229-v1:0 @@ -350,13 +361,18 @@ Learn how to get the most out of your LLM configuration: ```python Code + # Option 1. Gemini accessed with an API key. + # https://ai.google.dev/gemini-api/docs/api-key GEMINI_API_KEY= + + # Option 2. Vertex AI IAM credentials for Gemini, Anthropic, and anything in the Model Garden. + # https://cloud.google.com/vertex-ai/generative-ai/docs/overview ``` Example usage: ```python Code llm = LLM( - model="gemini/gemini-pro", + model="gemini/gemini-1.5-pro-latest", temperature=0.7 ) ``` diff --git a/src/crewai/llm.py b/src/crewai/llm.py index d860a8907..6d90448b9 100644 --- a/src/crewai/llm.py +++ b/src/crewai/llm.py @@ -43,6 +43,10 @@ LLM_CONTEXT_WINDOW_SIZES = { "gpt-4-turbo": 128000, "o1-preview": 128000, "o1-mini": 128000, + # gemini + "gemini-1.5-pro": 2097152, + "gemini-1.5-flash": 1048576, + "gemini-1.5-flash-8b": 1048576, # deepseek "deepseek-chat": 128000, # groq @@ -61,6 +65,9 @@ LLM_CONTEXT_WINDOW_SIZES = { "mixtral-8x7b-32768": 32768, } +DEFAULT_CONTEXT_WINDOW_SIZE = 8192 +CONTEXT_WINDOW_USAGE_RATIO = 0.75 + @contextmanager def suppress_warnings(): @@ -124,6 +131,7 @@ class LLM: self.api_version = api_version self.api_key = api_key self.callbacks = callbacks + self.context_window_size = None self.kwargs = kwargs litellm.drop_params = True @@ -191,7 +199,16 @@ class LLM: def get_context_window_size(self) -> int: # Only using 75% of the context window size to avoid cutting the message in the middle - return int(LLM_CONTEXT_WINDOW_SIZES.get(self.model, 8192) * 0.75) + if self.context_window_size is not None: + return self.context_window_size + + self.context_window_size = int( + DEFAULT_CONTEXT_WINDOW_SIZE * CONTEXT_WINDOW_USAGE_RATIO + ) + for key, value in LLM_CONTEXT_WINDOW_SIZES.items(): + if self.model.startswith(key): + self.context_window_size = int(value * CONTEXT_WINDOW_USAGE_RATIO) + return self.context_window_size def set_callbacks(self, callbacks: List[Any]): callback_types = [type(callback) for callback in callbacks]