Compare commits

..

1 Commits

Author SHA1 Message Date
Devin AI
e0ee53b1b6 Fix: Load .env before importing crew module in chat command
This fixes issue #3934 where 'crewai chat' fails when the crew has
module-level LLM instantiation that requires OPENAI_API_KEY.

The issue occurred because 'crewai chat' imports the crew module directly
using __import__(), and any module-level code executes immediately during
import. At this point, the .env file hadn't been loaded yet, so
OPENAI_API_KEY was not available in the environment.

Changes:
- Load .env file in load_crew_and_name() before importing the crew module
- Use os.environ.setdefault() to avoid overriding existing env vars
- Add comprehensive tests covering:
  - Loading crew with .env containing OPENAI_API_KEY
  - Environment variable precedence (existing vars not overridden)
  - Loading crew without .env file

Fixes #3934

Co-Authored-By: João <joao@crewai.com>
2025-11-17 22:18:23 +00:00
8 changed files with 238 additions and 179 deletions

View File

@@ -14,22 +14,6 @@ from crewai_tools.tools.rag.rag_tool import Adapter
def _default_embedding_function():
"""Create a default embedding function using OpenAI's text-embedding-ada-002 model.
This function creates and returns an embedding function that uses OpenAI's API
to generate embeddings for text inputs. The embedding function is used by the
LanceDBAdapter to convert text queries into vector representations for similarity search.
Returns:
Callable: A function that takes a list of strings and returns their embeddings
as a list of vectors.
Example:
>>> embed_fn = _default_embedding_function()
>>> embeddings = embed_fn(["Hello world"])
>>> len(embeddings[0]) # Vector dimension
1536
"""
client = OpenAIClient()
def _embedding_function(input):
@@ -40,32 +24,6 @@ def _default_embedding_function():
class LanceDBAdapter(Adapter):
"""Adapter for integrating LanceDB vector database with CrewAI RAG tools.
LanceDBAdapter provides a bridge between CrewAI's RAG (Retrieval-Augmented Generation)
system and LanceDB, enabling efficient vector similarity search for knowledge retrieval.
It handles embedding generation, vector search, and data ingestion with precise control
over query parameters and column mappings.
Attributes:
uri: Database connection URI or path to the LanceDB database.
table_name: Name of the table to query within the LanceDB database.
embedding_function: Function to convert text into embeddings. Defaults to OpenAI's
text-embedding-ada-002 model.
top_k: Number of top results to return from similarity search. Defaults to 3.
vector_column_name: Name of the column containing vector embeddings. Defaults to "vector".
text_column_name: Name of the column containing text content. Defaults to "text".
Example:
>>> from crewai_tools.adapters.lancedb_adapter import LanceDBAdapter
>>> adapter = LanceDBAdapter(
... uri="./my_lancedb",
... table_name="documents",
... top_k=5
... )
>>> results = adapter.query("What is machine learning?")
>>> print(results)
"""
uri: str | Path
table_name: str
embedding_function: Callable = Field(default_factory=_default_embedding_function)
@@ -77,44 +35,12 @@ class LanceDBAdapter(Adapter):
_table: LanceDBTable = PrivateAttr()
def model_post_init(self, __context: Any) -> None:
"""Initialize the database connection and table after model instantiation.
This method is automatically called after the Pydantic model is initialized.
It establishes the connection to the LanceDB database and opens the specified
table for querying and data operations.
Args:
__context: Pydantic context object passed during initialization.
Raises:
Exception: If the database connection fails or the table does not exist.
"""
self._db = lancedb_connect(self.uri)
self._table = self._db.open_table(self.table_name)
super().model_post_init(__context)
def query(self, question: str) -> str: # type: ignore[override]
"""Perform a vector similarity search for the given question.
This method converts the input question into an embedding vector and searches
the LanceDB table for the most similar entries. It returns the top-k results
based on vector similarity, providing precise retrieval for RAG applications.
Args:
question: The text query to search for in the vector database.
Returns:
A string containing the concatenated text results from the top-k most
similar entries, separated by newlines.
Example:
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
>>> results = adapter.query("What is CrewAI?")
>>> print(results)
CrewAI is a framework for orchestrating AI agents...
CrewAI provides precise control over agent workflows...
"""
query = self.embedding_function([question])[0]
results = (
self._table.search(query, vector_column_name=self.vector_column_name)
@@ -130,23 +56,4 @@ class LanceDBAdapter(Adapter):
*args: Any,
**kwargs: Any,
) -> None:
"""Add data to the LanceDB table.
This method provides a direct interface to add new records to the underlying
LanceDB table. It accepts the same arguments as the LanceDB table's add method,
allowing flexible data ingestion for building knowledge bases.
Args:
*args: Positional arguments to pass to the LanceDB table's add method.
**kwargs: Keyword arguments to pass to the LanceDB table's add method.
Common kwargs include 'data' (list of records) and 'mode' (append/overwrite).
Example:
>>> adapter = LanceDBAdapter(uri="./db", table_name="docs")
>>> data = [
... {"text": "CrewAI enables agent collaboration", "vector": [0.1, 0.2, ...]},
... {"text": "LanceDB provides vector storage", "vector": [0.3, 0.4, ...]}
... ]
>>> adapter.add(data=data)
"""
self._table.add(*args, **kwargs)

View File

@@ -1,62 +0,0 @@
"""Test that LanceDB adapter has proper docstrings."""
import inspect
import pytest
lancedb = pytest.importorskip("lancedb")
from crewai_tools.adapters.lancedb_adapter import (
LanceDBAdapter,
_default_embedding_function,
)
def test_lancedb_adapter_class_has_docstring():
"""Verify that LanceDBAdapter class has a docstring."""
assert LanceDBAdapter.__doc__ is not None, "LanceDBAdapter class is missing a docstring"
assert len(LanceDBAdapter.__doc__.strip()) > 0, "LanceDBAdapter docstring is empty"
def test_lancedb_adapter_model_post_init_has_docstring():
"""Verify that model_post_init method has a docstring."""
assert (
LanceDBAdapter.model_post_init.__doc__ is not None
), "model_post_init method is missing a docstring"
assert (
len(LanceDBAdapter.model_post_init.__doc__.strip()) > 0
), "model_post_init docstring is empty"
def test_lancedb_adapter_query_has_docstring():
"""Verify that query method has a docstring."""
assert LanceDBAdapter.query.__doc__ is not None, "query method is missing a docstring"
assert len(LanceDBAdapter.query.__doc__.strip()) > 0, "query docstring is empty"
def test_lancedb_adapter_add_has_docstring():
"""Verify that add method has a docstring."""
assert LanceDBAdapter.add.__doc__ is not None, "add method is missing a docstring"
assert len(LanceDBAdapter.add.__doc__.strip()) > 0, "add docstring is empty"
def test_default_embedding_function_has_docstring():
"""Verify that _default_embedding_function has a docstring."""
assert (
_default_embedding_function.__doc__ is not None
), "_default_embedding_function is missing a docstring"
assert (
len(_default_embedding_function.__doc__.strip()) > 0
), "_default_embedding_function docstring is empty"
def test_docstrings_contain_required_sections():
"""Verify that docstrings contain Args, Returns, or Example sections where appropriate."""
query_doc = LanceDBAdapter.query.__doc__
assert query_doc is not None
assert "Args:" in query_doc or "Parameters:" in query_doc, "query docstring should have Args/Parameters section"
assert "Returns:" in query_doc, "query docstring should have Returns section"
add_doc = LanceDBAdapter.add.__doc__
assert add_doc is not None
assert "Args:" in add_doc or "Parameters:" in add_doc, "add docstring should have Args/Parameters section"

View File

@@ -1,4 +1,5 @@
import json
import os
from pathlib import Path
import platform
import re
@@ -11,7 +12,7 @@ import click
from packaging import version
import tomli
from crewai.cli.utils import read_toml
from crewai.cli.utils import load_env_vars, read_toml
from crewai.cli.version import get_crewai_version
from crewai.crew import Crew
from crewai.llm import LLM, BaseLLM
@@ -328,6 +329,11 @@ def load_crew_and_name() -> tuple[Crew, str]:
# Get the current working directory
cwd = Path.cwd()
# Load environment variables from .env file before importing the crew module
env_vars = load_env_vars(cwd)
for key, value in env_vars.items():
os.environ.setdefault(key, value)
# Path to the pyproject.toml file
pyproject_path = cwd / "pyproject.toml"
if not pyproject_path.exists():

View File

@@ -0,0 +1,212 @@
"""Tests for crew_chat.py environment variable loading."""
import os
from unittest.mock import Mock, patch
import pytest
from crewai.cli.crew_chat import load_crew_and_name
@pytest.fixture
def temp_crew_project(tmp_path):
"""Create a temporary crew project with .env file."""
project_dir = tmp_path / "test_crew"
project_dir.mkdir()
src_dir = project_dir / "src" / "test_crew"
src_dir.mkdir(parents=True)
env_file = project_dir / ".env"
env_file.write_text("OPENAI_API_KEY=test-api-key-from-env\nMODEL=gpt-4\n")
pyproject = project_dir / "pyproject.toml"
pyproject.write_text("""[project]
name = "test_crew"
version = "0.1.0"
description = "Test crew"
requires-python = ">=3.10"
dependencies = ["crewai"]
[tool.crewai]
type = "crew"
""")
(src_dir / "__init__.py").write_text("")
crew_py = src_dir / "crew.py"
crew_py.write_text("""from crewai import Agent, Crew, Process, Task, LLM
from crewai.project import CrewBase, agent, crew, task
default_llm = LLM(model="openai/gpt-4")
@CrewBase
class TestCrew:
'''Test crew'''
@agent
def researcher(self) -> Agent:
return Agent(
role="Researcher",
goal="Research topics",
backstory="You are a researcher",
llm=default_llm,
)
@task
def research_task(self) -> Task:
return Task(
description="Research {topic}",
expected_output="A report",
agent=self.researcher(),
)
@crew
def crew(self) -> Crew:
return Crew(
agents=[self.researcher()],
tasks=[self.research_task()],
process=Process.sequential,
verbose=True,
)
""")
config_dir = src_dir / "config"
config_dir.mkdir()
agents_yaml = config_dir / "agents.yaml"
agents_yaml.write_text("""researcher:
role: Researcher
goal: Research topics
backstory: You are a researcher
""")
tasks_yaml = config_dir / "tasks.yaml"
tasks_yaml.write_text("""research_task:
description: Research {topic}
expected_output: A report
agent: researcher
""")
return project_dir
def test_load_crew_with_env_file(temp_crew_project, monkeypatch):
"""Test that load_crew_and_name loads .env before importing crew module."""
monkeypatch.chdir(temp_crew_project)
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
with patch("crewai.llm.LLM") as mock_llm:
mock_llm.return_value = Mock()
crew_instance, crew_name = load_crew_and_name()
assert crew_instance is not None
assert crew_name == "TestCrew"
assert os.environ.get("OPENAI_API_KEY") == "test-api-key-from-env"
assert os.environ.get("MODEL") == "gpt-4"
def test_env_var_precedence(temp_crew_project, monkeypatch):
"""Test that existing environment variables are not overridden by .env."""
monkeypatch.chdir(temp_crew_project)
existing_key = "existing-api-key-from-shell"
monkeypatch.setenv("OPENAI_API_KEY", existing_key)
with patch("crewai.llm.LLM") as mock_llm:
mock_llm.return_value = Mock()
crew_instance, crew_name = load_crew_and_name()
assert crew_instance is not None
assert crew_name == "TestCrew"
assert os.environ.get("OPENAI_API_KEY") == existing_key
assert os.environ.get("MODEL") == "gpt-4"
def test_load_crew_without_env_file(tmp_path, monkeypatch):
"""Test that load_crew_and_name works even without .env file."""
project_dir = tmp_path / "test_crew_no_env"
project_dir.mkdir()
src_dir = project_dir / "src" / "test_crew_no_env"
src_dir.mkdir(parents=True)
pyproject = project_dir / "pyproject.toml"
pyproject.write_text("""[project]
name = "test_crew_no_env"
version = "0.1.0"
description = "Test crew without env"
requires-python = ">=3.10"
dependencies = ["crewai"]
[tool.crewai]
type = "crew"
""")
(src_dir / "__init__.py").write_text("")
crew_py = src_dir / "crew.py"
crew_py.write_text("""from crewai import Agent, Crew, Process, Task
from crewai.project import CrewBase, agent, crew, task
@CrewBase
class TestCrewNoEnv:
'''Test crew without env'''
@agent
def researcher(self) -> Agent:
return Agent(
role="Researcher",
goal="Research topics",
backstory="You are a researcher",
)
@task
def research_task(self) -> Task:
return Task(
description="Research {topic}",
expected_output="A report",
agent=self.researcher(),
)
@crew
def crew(self) -> Crew:
return Crew(
agents=[self.researcher()],
tasks=[self.research_task()],
process=Process.sequential,
verbose=True,
)
""")
config_dir = src_dir / "config"
config_dir.mkdir()
agents_yaml = config_dir / "agents.yaml"
agents_yaml.write_text("""researcher:
role: Researcher
goal: Research topics
backstory: You are a researcher
""")
tasks_yaml = config_dir / "tasks.yaml"
tasks_yaml.write_text("""research_task:
description: Research {topic}
expected_output: A report
agent: researcher
""")
monkeypatch.chdir(project_dir)
monkeypatch.setenv("OPENAI_API_KEY", "test-key")
crew_instance, crew_name = load_crew_and_name()
assert crew_instance is not None
assert crew_name == "TestCrewNoEnv"

View File

@@ -13,7 +13,7 @@ load_result = load_dotenv(override=True)
@pytest.fixture(autouse=True)
def setup_test_environment():
"""Set up test environment with a temporary directory for SQLite storage."""
with tempfile.TemporaryDirectory(ignore_cleanup_errors=True) as temp_dir:
with tempfile.TemporaryDirectory() as temp_dir:
# Create the directory with proper permissions
storage_dir = Path(temp_dir) / "crewai_test_storage"
storage_dir.mkdir(parents=True, exist_ok=True)

View File

@@ -144,8 +144,9 @@ class TestAgentEvaluator:
mock_crew.tasks.append(task)
events = {}
results_condition = threading.Condition()
results_ready = False
started_event = threading.Event()
completed_event = threading.Event()
task_completed_event = threading.Event()
agent_evaluator = AgentEvaluator(
agents=[agent], evaluators=[GoalAlignmentEvaluator()]
@@ -155,11 +156,13 @@ class TestAgentEvaluator:
async def capture_started(source, event):
if event.agent_id == str(agent.id):
events["started"] = event
started_event.set()
@crewai_event_bus.on(AgentEvaluationCompletedEvent)
async def capture_completed(source, event):
if event.agent_id == str(agent.id):
events["completed"] = event
completed_event.set()
@crewai_event_bus.on(AgentEvaluationFailedEvent)
def capture_failed(source, event):
@@ -167,20 +170,17 @@ class TestAgentEvaluator:
@crewai_event_bus.on(TaskCompletedEvent)
async def on_task_completed(source, event):
nonlocal results_ready
# TaskCompletedEvent fires AFTER evaluation results are stored
if event.task and event.task.id == task.id:
while not agent_evaluator.get_evaluation_results().get(agent.role):
pass
with results_condition:
results_ready = True
results_condition.notify()
task_completed_event.set()
mock_crew.kickoff()
with results_condition:
assert results_condition.wait_for(
lambda: results_ready, timeout=5
), "Timeout waiting for evaluation results"
assert started_event.wait(timeout=5), "Timeout waiting for started event"
assert completed_event.wait(timeout=5), "Timeout waiting for completed event"
assert task_completed_event.wait(timeout=5), (
"Timeout waiting for task completion"
)
assert events.keys() == {"started", "completed"}
assert events["started"].agent_id == str(agent.id)

View File

@@ -647,7 +647,6 @@ def test_handle_streaming_tool_calls_no_tools(mock_emit):
@pytest.mark.vcr(filter_headers=["authorization"])
@pytest.mark.skip(reason="Highly flaky on ci")
def test_llm_call_when_stop_is_unsupported(caplog):
llm = LLM(model="o1-mini", stop=["stop"], is_litellm=True)
with caplog.at_level(logging.INFO):
@@ -658,7 +657,6 @@ def test_llm_call_when_stop_is_unsupported(caplog):
@pytest.mark.vcr(filter_headers=["authorization"])
@pytest.mark.skip(reason="Highly flaky on ci")
def test_llm_call_when_stop_is_unsupported_when_additional_drop_params_is_provided(
caplog,
):
@@ -666,6 +664,7 @@ def test_llm_call_when_stop_is_unsupported_when_additional_drop_params_is_provid
model="o1-mini",
stop=["stop"],
additional_drop_params=["another_param"],
is_litellm=True,
)
with caplog.at_level(logging.INFO):
result = llm.call("What is the capital of France?")

View File

@@ -273,15 +273,12 @@ def another_simple_tool():
def test_internal_crew_with_mcp():
from crewai_tools.adapters.tool_collection import ToolCollection
from crewai_tools import MCPServerAdapter
from crewai_tools.adapters.mcp_adapter import ToolCollection
mock_adapter = Mock()
mock_adapter.tools = ToolCollection([simple_tool, another_simple_tool])
with (
patch("crewai_tools.MCPServerAdapter", return_value=mock_adapter) as adapter_mock,
patch("crewai.llm.LLM.__new__", return_value=Mock()),
):
mock = Mock(spec=MCPServerAdapter)
mock.tools = ToolCollection([simple_tool, another_simple_tool])
with patch("crewai_tools.MCPServerAdapter", return_value=mock) as adapter_mock:
crew = InternalCrewWithMCP()
assert crew.reporting_analyst().tools == [simple_tool, another_simple_tool]
assert crew.researcher().tools == [simple_tool]