From fa52c1f918923ede64607a626e61275c13f99b4e Mon Sep 17 00:00:00 2001 From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com> Date: Mon, 24 Mar 2025 08:05:56 +0000 Subject: [PATCH] Fix regex pattern in agent.py:set_knowledge to support Chinese characters (#2454) Co-Authored-By: Joe Moura --- src/crewai/agent.py | 2 +- .../test_chinese_agent.py | 36 +++++++++++++++++++ .../test_chinese_agent_name.py | 28 +++++++++++++++ 3 files changed, 65 insertions(+), 1 deletion(-) create mode 100644 tests/test_chinese_support/test_chinese_agent.py create mode 100644 tests/test_chinese_support/test_chinese_agent_name.py diff --git a/src/crewai/agent.py b/src/crewai/agent.py index d10b768d4..9a2f098ff 100644 --- a/src/crewai/agent.py +++ b/src/crewai/agent.py @@ -140,7 +140,7 @@ class Agent(BaseAgent): self.embedder = crew_embedder if self.knowledge_sources: - full_pattern = re.compile(r"[^a-zA-Z0-9\-_\r\n]|(\.\.)") + full_pattern = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fa5\-_\r\n]|(\.\.)") knowledge_agent_name = f"{re.sub(full_pattern, '_', self.role)}" if isinstance(self.knowledge_sources, list) and all( isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources diff --git a/tests/test_chinese_support/test_chinese_agent.py b/tests/test_chinese_support/test_chinese_agent.py new file mode 100644 index 000000000..7a3b81f34 --- /dev/null +++ b/tests/test_chinese_support/test_chinese_agent.py @@ -0,0 +1,36 @@ +import re +import pytest +from unittest.mock import patch, MagicMock +from crewai.agent import Agent +from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource + +def test_agent_with_chinese_role_name(): + """Test that an agent with a Chinese role name works correctly with the updated regex pattern.""" + # Create a knowledge source with some content + content = "This is some test content." + string_source = StringKnowledgeSource(content=content) + + # Mock the Knowledge class to avoid actual initialization + with patch("crewai.agent.Knowledge") as MockKnowledge: + mock_knowledge_instance = MockKnowledge.return_value + + # Create an agent with a Chinese role name + agent = Agent( + role="中文角色", # Chinese role name + goal="Test Chinese character support", + backstory="Testing Chinese character support in agent role names.", + knowledge_sources=[string_source], + ) + + # Call set_knowledge to trigger the regex pattern + agent.set_knowledge() + + # Check that Knowledge was called with the correct collection_name + calls = MockKnowledge.call_args_list + for call in calls: + args, kwargs = call + if 'collection_name' in kwargs: + collection_name = kwargs['collection_name'] + print(f"Collection name: {collection_name}") + # The collection name should contain the Chinese characters + assert "中文角色" == collection_name diff --git a/tests/test_chinese_support/test_chinese_agent_name.py b/tests/test_chinese_support/test_chinese_agent_name.py new file mode 100644 index 000000000..85ab4075b --- /dev/null +++ b/tests/test_chinese_support/test_chinese_agent_name.py @@ -0,0 +1,28 @@ +import pytest +import re +from crewai.agent import Agent +from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource + +def test_agent_with_chinese_role_name(): + """Test that an agent with a Chinese role name works correctly.""" + # Create a knowledge source with some content + content = "This is some test content." + string_source = StringKnowledgeSource(content=content) + + # Create an agent with a Chinese role name + agent = Agent( + role="中文角色", # Chinese role name + goal="Test Chinese character support", + backstory="Testing Chinese character support in agent role names.", + knowledge_sources=[string_source], + ) + + # Test that the regex pattern in agent.py correctly preserves Chinese characters + full_pattern = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fa5\-_\r\n]|(\.\.)") + knowledge_agent_name = f"{re.sub(full_pattern, '_', agent.role)}" + + # Verify that the agent was created successfully + assert agent.role == "中文角色" + + # Verify that the Chinese characters are preserved in the knowledge_agent_name + assert knowledge_agent_name == "中文角色"