refactor: Improve Mistral LLM implementation based on feedback

- Add MISTRAL_IDENTIFIERS constant - Use deepcopy for message copying - Add type annotations - Improve test organization and add edge cases - Add error handling and logging Co-Authored-By: Joe Moura <joao@crewai.com>
test: Add VCR cassette for Mistral role handling test
2026-01-08 23:58:34 +00:00 · 2025-02-21 18:28:19 +00:00 · 2025-02-21 18:20:28 +00:00 · 2025-02-21 18:19:47 +00:00 · 2025-02-21 10:11:55 -05:00
6 changed files with 189 additions and 106 deletions
--- a/docs/how-to/langfuse-observability.mdx
+++ b/docs/how-to/langfuse-observability.mdx
@@ -10,6 +10,8 @@ This notebook demonstrates how to integrate **Langfuse** with **CrewAI** using O

 > **What is Langfuse?** [Langfuse](https://langfuse.com) is an open-source LLM engineering platform. It provides tracing and monitoring capabilities for LLM applications, helping developers debug, analyze, and optimize their AI systems. Langfuse integrates with various tools and frameworks via native integrations, OpenTelemetry, and APIs/SDKs.

+[![Langfuse Overview Video](https://github.com/user-attachments/assets/3926b288-ff61-4b95-8aa1-45d041c70866)](https://langfuse.com/watch-demo)
+
 ## Get Started

 We'll walk through a simple example of using CrewAI and integrating it with Langfuse via OpenTelemetry using OpenLit.
--- a/src/crewai/llm.py
+++ b/src/crewai/llm.py
@@ -21,6 +21,8 @@ from typing import (
 from dotenv import load_dotenv
 from pydantic import BaseModel

+logger = logging.getLogger(__name__)
+
 from crewai.utilities.events.tool_usage_events import ToolExecutionErrorEvent

 with warnings.catch_warnings():
@@ -133,6 +135,9 @@ def suppress_warnings():


 class LLM:
+    # Constants for model identification
+    MISTRAL_IDENTIFIERS = {'mistral', 'mixtral'}
+
    def __init__(
        self,
        model: str,
@@ -392,9 +397,11 @@ class LLM:
        Returns:
            List of formatted messages according to provider requirements.
            For Anthropic models, ensures first message has 'user' role.
+            For Mistral models, converts 'assistant' roles to 'user' roles.

        Raises:
            TypeError: If messages is None or contains invalid message format.
+            Exception: If message formatting fails for any provider-specific reason.
        """
        if messages is None:
            raise TypeError("Messages cannot be None")
@@ -406,6 +413,19 @@ class LLM:
                    "Invalid message format. Each message must be a dict with 'role' and 'content' keys"
                )

+        # Handle Mistral role requirements
+        if any(identifier in self.model.lower() for identifier in self.MISTRAL_IDENTIFIERS):
+            try:
+                from copy import deepcopy
+                messages_copy = deepcopy(messages)
+                for message in messages_copy:
+                    if message.get("role") == "assistant":
+                        message["role"] = "user"
+                return messages_copy
+            except Exception as e:
+                logger.error(f"Error formatting messages for Mistral: {str(e)}")
+                raise
+
        if not self.is_anthropic:
            return messages

--- a/src/crewai/utilities/converter.py
+++ b/src/crewai/utilities/converter.py
@@ -1,7 +1,5 @@
 import json
-import logging
 import re
-from functools import lru_cache
 from typing import Any, Optional, Type, Union, get_args, get_origin

 from pydantic import BaseModel, ValidationError
@@ -10,8 +8,6 @@ from crewai.agents.agent_builder.utilities.base_output_converter import OutputCo
 from crewai.utilities.printer import Printer
 from crewai.utilities.pydantic_schema_parser import PydanticSchemaParser

-logger = logging.getLogger(__name__)
-

 class ConverterError(Exception):
    """Error raised when Converter fails to parse the input."""
@@ -257,57 +253,17 @@ def create_converter(
    return converter


-FIELD_TYPE_KEY = "type"
-FIELD_DESC_KEY = "description"
-
 def generate_model_description(model: Type[BaseModel]) -> str:
    """
    Generate a string description of a Pydantic model's fields and their types.

    This function takes a Pydantic model class and returns a string that describes
    the model's fields and their respective types. The description includes handling
-@lru_cache(maxsize=100)
-def generate_model_description(model: Type[BaseModel]) -> str:
-    models and field descriptions when available.
-
-    Args:
-        model: A Pydantic BaseModel class to generate description for
-
-    Returns:
-        str: A JSON-like string describing the model's fields, their types, and descriptions
+    of complex types such as `Optional`, `List`, and `Dict`, as well as nested Pydantic
+    models.
    """

-    def describe_field(field_type: Any, field_info: Optional[Any] = None) -> Union[str, dict]:
-        """
-        Generate a description for a model field including its type and description.
-
-        Args:
-            field_type: The type annotation of the field
-            field_info: Optional field information containing description
-
-        Returns:
-            Union[str, dict]: Field description either as string (type only) or
-                            dict with type and description
-        """
-        try:
-            type_desc = get_type_description(field_type)
-            if field_info and field_info.description:
-                return {FIELD_TYPE_KEY: type_desc, FIELD_DESC_KEY: field_info.description}
-            return type_desc
-        except Exception as e:
-            logger.warning(f"Error processing field description: {e}")
-            return str(field_type)
-
-    def get_type_description(field_type: Any) -> str:
-        """
-        Get the type description for a field type.
-
-        Args:
-            field_type: The type annotation to describe
-
-        Returns:
-            str: A string representation of the type
-        """
+    def describe_field(field_type):
        origin = get_origin(field_type)
        args = get_args(field_type)

@@ -315,14 +271,14 @@ def generate_model_description(model: Type[BaseModel]) -> str:
            # Handle both Union and the new '|' syntax
            non_none_args = [arg for arg in args if arg is not type(None)]
            if len(non_none_args) == 1:
-                return f"Optional[{get_type_description(non_none_args[0])}]"
+                return f"Optional[{describe_field(non_none_args[0])}]"
            else:
-                return f"Optional[Union[{', '.join(get_type_description(arg) for arg in non_none_args)}]]"
+                return f"Optional[Union[{', '.join(describe_field(arg) for arg in non_none_args)}]]"
        elif origin is list:
-            return f"List[{get_type_description(args[0])}]"
+            return f"List[{describe_field(args[0])}]"
        elif origin is dict:
-            key_type = get_type_description(args[0])
-            value_type = get_type_description(args[1])
+            key_type = describe_field(args[0])
+            value_type = describe_field(args[1])
            return f"Dict[{key_type}, {value_type}]"
        elif isinstance(field_type, type) and issubclass(field_type, BaseModel):
            return generate_model_description(field_type)
@@ -331,12 +287,8 @@ def generate_model_description(model: Type[BaseModel]) -> str:
        else:
            return str(field_type)

-    fields = model.model_fields
-    field_descriptions = []
-    for name, field in fields.items():
-        field_desc = describe_field(field.annotation, field)
-        if isinstance(field_desc, dict):
-            field_descriptions.append(f'"{name}": {json.dumps(field_desc)}')
-        else:
-            field_descriptions.append(f'"{name}": {field_desc}')
+    fields = model.__annotations__
+    field_descriptions = [
+        f'"{name}": {describe_field(type_)}' for name, type_ in fields.items()
+    ]
    return "{\n  " + ",\n  ".join(field_descriptions) + "\n}"
--- a/tests/cassettes/test_mistral_with_tools.yaml
+++ b/tests/cassettes/test_mistral_with_tools.yaml
@@ -0,0 +1,76 @@
+interactions:
+- request:
+    body: '{"messages": [{"role": "user", "content": "Use the dummy tool with param
+      ''test''"}], "model": "mistral-large-latest", "stop": [], "tools": [{"type":
+      "function", "function": {"name": "dummy_tool", "description": "A simple test
+      tool.", "parameters": {"type": "object", "properties": {"param": {"type": "string",
+      "description": "A test parameter"}}, "required": ["param"]}}}]}'
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      connection:
+      - keep-alive
+      content-length:
+      - '372'
+      content-type:
+      - application/json
+      host:
+      - api.mistral.ai
+      user-agent:
+      - OpenAI/Python 1.61.0
+      x-stainless-arch:
+      - x64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - Linux
+      x-stainless-package-version:
+      - 1.61.0
+      x-stainless-raw-response:
+      - 'true'
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.7
+    method: POST
+    uri: https://api.mistral.ai/v1/chat/completions
+  response:
+    content: "{\n  \"message\":\"Unauthorized\",\n  \"request_id\":\"96ca5615d43f134988d0fc4b1ded1455\"\n}"
+    headers:
+      CF-RAY:
+      - 9158bb5adad376f1-SEA
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '81'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Fri, 21 Feb 2025 18:17:12 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=MGDKyTo6P8HCsRCn9L6BcLQuWlHhR_Oyx0OAG2lNook-1740161832-1.0.1.1-4TQjjEAQkY4UdlzBET20v1w7G87AU38G8amFRICHPql3I0aHI5pV3Bez0qKp6f3cBT351xkaHyInoOA6FeoJqQ;
+        path=/; expires=Fri, 21-Feb-25 18:47:12 GMT; domain=.mistral.ai; HttpOnly;
+        Secure; SameSite=None
+      access-control-allow-origin:
+      - '*'
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      www-authenticate:
+      - Key
+      x-kong-request-id:
+      - 96ca5615d43f134988d0fc4b1ded1455
+      x-kong-response-latency:
+      - '0'
+    http_version: HTTP/1.1
+    status_code: 401
+version: 1
--- a/tests/llm_test.py
+++ b/tests/llm_test.py
@@ -13,6 +13,84 @@ from crewai.utilities.token_counter_callback import TokenCalcHandler


 # TODO: This test fails without print statement, which makes me think that something is happening asynchronously that we need to eventually fix and dive deeper into at a later date
+@pytest.mark.vcr(filter_headers=["authorization"])
+@pytest.mark.mistral
+class TestMistralLLM:
+    """Test suite for Mistral LLM functionality."""
+    
+    @pytest.fixture
+    def mistral_llm(self):
+        """Fixture providing a Mistral LLM instance."""
+        return LLM(model="mistral/mistral-large-latest")
+    
+    def test_mistral_role_handling(self, mistral_llm):
+        """
+        Verify that roles are handled correctly in various scenarios:
+        - Assistant roles are converted to user roles
+        - Original messages remain unchanged
+        - System messages are preserved
+        """
+        messages = [
+            {"role": "system", "content": "System message"},
+            {"role": "user", "content": "Test message"},
+            {"role": "assistant", "content": "Assistant response"}
+        ]
+        
+        formatted_messages = mistral_llm._format_messages_for_provider(messages)
+        
+        # Verify role conversions
+        assert any(msg["role"] == "user" for msg in formatted_messages if msg["content"] == "Assistant response")
+        assert not any(msg["role"] == "assistant" for msg in formatted_messages)
+        assert any(msg["role"] == "system" for msg in formatted_messages)
+        
+        # Original messages should not be modified
+        assert any(msg["role"] == "assistant" for msg in messages)
+    
+    def test_mistral_empty_messages(self, mistral_llm):
+        """Test handling of empty message list."""
+        messages = []
+        formatted_messages = mistral_llm._format_messages_for_provider(messages)
+        assert formatted_messages == []
+    
+    def test_mistral_multiple_assistant_messages(self, mistral_llm):
+        """Test handling of multiple consecutive assistant messages."""
+        messages = [
+            {"role": "user", "content": "User 1"},
+            {"role": "assistant", "content": "Assistant 1"},
+            {"role": "assistant", "content": "Assistant 2"},
+            {"role": "user", "content": "User 2"}
+        ]
+        
+        formatted_messages = mistral_llm._format_messages_for_provider(messages)
+        
+        # All assistant messages should be converted to user
+        assert all(msg["role"] == "user" for msg in formatted_messages 
+                  if msg["content"] in ["Assistant 1", "Assistant 2"])
+        
+        # Original messages should not be modified
+        assert len([msg for msg in messages if msg["role"] == "assistant"]) == 2
+
+
+def test_mistral_role_handling():
+    """Test that Mistral LLM correctly handles role requirements."""
+    llm = LLM(model="mistral/mistral-large-latest")
+    messages = [
+        {"role": "system", "content": "System message"},
+        {"role": "user", "content": "User message"},
+        {"role": "assistant", "content": "Assistant message"}
+    ]
+    
+    # Get the formatted messages
+    formatted_messages = llm._format_messages_for_provider(messages)
+    
+    # Verify that assistant role was changed to user for Mistral
+    assert any(msg["role"] == "user" for msg in formatted_messages if msg["content"] == "Assistant message")
+    assert not any(msg["role"] == "assistant" for msg in formatted_messages)
+    
+    # Original messages should not be modified
+    assert any(msg["role"] == "assistant" for msg in messages)
+
+
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_llm_callback_replacement():
    llm1 = LLM(model="gpt-4o-mini")
--- a/tests/utilities/test_converter.py
+++ b/tests/utilities/test_converter.py
@@ -4,7 +4,7 @@ from typing import Dict, List, Optional
 from unittest.mock import MagicMock, Mock, patch

 import pytest
-from pydantic import BaseModel, Field
+from pydantic import BaseModel

 from crewai.llm import LLM
 from crewai.utilities.converter import (
@@ -328,51 +328,6 @@ def test_generate_model_description_dict_field():
    assert description == expected_description


-@pytest.mark.field_descriptions
-def test_generate_model_description_with_field_descriptions():
-    """
-    Verify that the model description generator correctly includes field descriptions
-    when they are provided via Field(..., description='...').
-    """
-    class ModelWithDescriptions(BaseModel):
-        name: str = Field(..., description="The user's full name")
-        age: int = Field(..., description="The user's age in years")
-        
-    description = generate_model_description(ModelWithDescriptions)
-    expected = '{\n  "name": {"type": "str", "description": "The user\'s full name"},\n  "age": {"type": "int", "description": "The user\'s age in years"}\n}'
-    assert description == expected
-
-
-@pytest.mark.field_descriptions
-def test_generate_model_description_mixed_fields():
-    """
-    Verify that the model description generator correctly handles a mix of fields
-    with and without descriptions.
-    """
-    class MixedModel(BaseModel):
-        name: str = Field(..., description="The user's name")
-        age: int  # No description
-        
-    description = generate_model_description(MixedModel)
-    expected = '{\n  "name": {"type": "str", "description": "The user\'s name"},\n  "age": int\n}'
-    assert description == expected
-
-
-@pytest.mark.field_descriptions
-def test_generate_model_description_with_empty_description():
-    """
-    Verify that the model description generator correctly handles fields with empty
-    descriptions by treating them as fields without descriptions.
-    """
-    class ModelWithEmptyDescription(BaseModel):
-        name: str = Field(..., description="")
-        age: int = Field(..., description=None)
-        
-    description = generate_model_description(ModelWithEmptyDescription)
-    expected = '{\n  "name": str,\n  "age": int\n}'
-    assert description == expected
-
-
@pytest.mark.vcr(filter_headers=["authorization"])
 def test_convert_with_instructions():
    llm = LLM(model="gpt-4o-mini")
Author	SHA1	Message	Date
Devin AI	92dd7feec2	refactor: Improve Mistral LLM implementation based on feedback - Add MISTRAL_IDENTIFIERS constant - Use deepcopy for message copying - Add type annotations - Improve test organization and add edge cases - Add error handling and logging Co-Authored-By: Joe Moura <joao@crewai.com>	2025-02-21 18:28:19 +00:00
Devin AI	be5b448a8a	test: Add VCR cassette for Mistral role handling test Co-Authored-By: Joe Moura <joao@crewai.com>	2025-02-21 18:20:28 +00:00
Devin AI	adfdbe55cf	fix: Handle Mistral LLM role requirements for tools - Modify role handling in LLM class for Mistral models - Add tests for Mistral role handling with tools - Fixes #2194 Co-Authored-By: Joe Moura <joao@crewai.com>	2025-02-21 18:19:47 +00:00
Jannik Maierhöfer	b50772a38b	docs: add header image to langfuse guide (#2128 ) Co-authored-by: Brandon Hancock (bhancock_ai) <109994880+bhancockio@users.noreply.github.com>	2025-02-21 10:11:55 -05:00