diff --git a/docs/concepts/llms.mdx b/docs/concepts/llms.mdx
index 2712de77a..e17098f6a 100644
--- a/docs/concepts/llms.mdx
+++ b/docs/concepts/llms.mdx
@@ -270,7 +270,7 @@ In this section, you'll find detailed examples that help you select, configure,
| Claude 3.5 Haiku | Up to 200k tokens | Fast, compact multimodal model optimized for quick responses and seamless human-like interactions |
| Claude 3 Sonnet | Up to 200k tokens | Multimodal model balancing intelligence and speed for high-volume deployments. |
| Claude 3 Haiku | Up to 200k tokens | Compact, high-speed multimodal model optimized for quick responses and natural conversational interactions |
- | Claude 3 Opus | Up to 200k tokens | Most advanced multimodal model excelling at complex tasks with human-like reasoning and superior contextual understanding. |
+ | Claude 3 Opus | Up to 200k tokens | Most advanced multimodal model exceling at complex tasks with human-like reasoning and superior contextual understanding. |
| Claude 2.1 | Up to 200k tokens | Enhanced version with expanded context window, improved reliability, and reduced hallucinations for long-form and RAG applications |
| Claude | Up to 100k tokens | Versatile model excelling in sophisticated dialogue, creative content, and precise instruction following. |
| Claude Instant | Up to 100k tokens | Fast, cost-effective model for everyday tasks like dialogue, analysis, summarization, and document Q&A |
@@ -406,6 +406,46 @@ In this section, you'll find detailed examples that help you select, configure,
| baichuan-inc/baichuan2-13b-chat | 4,096 tokens | Support Chinese and English chat, coding, math, instruction following, solving quizzes |
+
+
+ NVIDIA NIM enables you to run powerful LLMs locally on your Windows machine using WSL2 (Windows Subsystem for Linux).
+ This approach allows you to leverage your NVIDIA GPU for private, secure, and cost-effective AI inference without relying on cloud services.
+ Perfect for development, testing, or production scenarios where data privacy or offline capabilities are required.
+
+ Here is a step-by-step guide to setting up a local NVIDIA NIM model:
+
+ 1. Follow installation instructions from [NVIDIA Website](https://docs.nvidia.com/nim/wsl2/latest/getting-started.html)
+
+ 2. Install the local model. For Llama 3.1-8b follow [instructions](https://build.nvidia.com/meta/llama-3_1-8b-instruct/deploy)
+
+ 3. Configure your crewai local models:
+
+ ```python Code
+ from crewai.llm import LLM
+
+ local_nvidia_nim_llm = LLM(
+ model="openai/meta/llama-3.1-8b-instruct", # it's an openai-api compatible model
+ base_url="http://localhost:8000/v1",
+ api_key="", # api_key is required, but you can use any text
+ )
+
+ # Then you can use it in your crew:
+
+ @CrewBase
+ class MyCrew():
+ # ...
+
+ @agent
+ def researcher(self) -> Agent:
+ return Agent(
+ config=self.agents_config['researcher'],
+ llm=local_nvidia_nim_llm
+ )
+
+ # ...
+ ```
+
+
Set the following environment variables in your `.env` file:
diff --git a/src/crewai/agents/parser.py b/src/crewai/agents/parser.py
index 1bda4df5c..05c5bc003 100644
--- a/src/crewai/agents/parser.py
+++ b/src/crewai/agents/parser.py
@@ -124,9 +124,9 @@ class CrewAgentParser:
)
def _extract_thought(self, text: str) -> str:
- thought_index = text.find("\n\nAction")
+ thought_index = text.find("\nAction")
if thought_index == -1:
- thought_index = text.find("\n\nFinal Answer")
+ thought_index = text.find("\nFinal Answer")
if thought_index == -1:
return ""
thought = text[:thought_index].strip()