Compare commits

...

4 Commits

Author SHA1 Message Date
Devin AI
44d69124d7 Add parameterized tests for diverse Chinese character scenarios
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-24 08:09:54 +00:00
Devin AI
168e3c99d5 Enhance regex pattern with expanded CJK character support and fix import sorting
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-24 08:09:29 +00:00
Devin AI
b2661f0078 Fix import sorting in test files
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-24 08:07:49 +00:00
Devin AI
fa52c1f918 Fix regex pattern in agent.py:set_knowledge to support Chinese characters (#2454)
Co-Authored-By: Joe Moura <joao@crewai.com>
2025-03-24 08:05:56 +00:00
4 changed files with 122 additions and 1 deletions

View File

@@ -140,7 +140,10 @@ class Agent(BaseAgent):
self.embedder = crew_embedder
if self.knowledge_sources:
full_pattern = re.compile(r"[^a-zA-Z0-9\-_\r\n]|(\.\.)")
# Unicode ranges for CJK characters:
# \u4e00-\u9fff: Common Chinese characters
# \u3400-\u4dbf: Extended CJK characters
full_pattern = re.compile(r"[^\w\u4e00-\u9fff\u3400-\u4dbf\-_\r\n]|(\.\.)")
knowledge_agent_name = f"{re.sub(full_pattern, '_', self.role)}"
if isinstance(self.knowledge_sources, list) and all(
isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources

View File

@@ -0,0 +1,39 @@
import re
from unittest.mock import MagicMock, patch
import pytest
from crewai.agent import Agent
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
def test_agent_with_chinese_role_name():
"""Test that an agent with a Chinese role name works correctly with the updated regex pattern."""
# Create a knowledge source with some content
content = "This is some test content."
string_source = StringKnowledgeSource(content=content)
# Mock the Knowledge class to avoid actual initialization
with patch("crewai.agent.Knowledge") as MockKnowledge:
mock_knowledge_instance = MockKnowledge.return_value
# Create an agent with a Chinese role name
agent = Agent(
role="中文角色", # Chinese role name
goal="Test Chinese character support",
backstory="Testing Chinese character support in agent role names.",
knowledge_sources=[string_source],
)
# Call set_knowledge to trigger the regex pattern
agent.set_knowledge()
# Check that Knowledge was called with the correct collection_name
calls = MockKnowledge.call_args_list
for call in calls:
args, kwargs = call
if 'collection_name' in kwargs:
collection_name = kwargs['collection_name']
print(f"Collection name: {collection_name}")
# The collection name should contain the Chinese characters
assert "中文角色" == collection_name

View File

@@ -0,0 +1,31 @@
import re
import pytest
from crewai.agent import Agent
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
def test_agent_with_chinese_role_name():
"""Test that an agent with a Chinese role name works correctly."""
# Create a knowledge source with some content
content = "This is some test content."
string_source = StringKnowledgeSource(content=content)
# Create an agent with a Chinese role name
agent = Agent(
role="中文角色", # Chinese role name
goal="Test Chinese character support",
backstory="Testing Chinese character support in agent role names.",
knowledge_sources=[string_source],
)
# Test that the regex pattern in agent.py correctly preserves Chinese characters
full_pattern = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fa5\-_\r\n]|(\.\.)")
knowledge_agent_name = f"{re.sub(full_pattern, '_', agent.role)}"
# Verify that the agent was created successfully
assert agent.role == "中文角色"
# Verify that the Chinese characters are preserved in the knowledge_agent_name
assert knowledge_agent_name == "中文角色"

View File

@@ -0,0 +1,48 @@
import re
import pytest
from crewai.agent import Agent
from crewai.knowledge.source.string_knowledge_source import StringKnowledgeSource
@pytest.mark.parametrize(
"role_name,expected_output",
[
("中文角色", "中文角色"),
("中文角色123", "中文角色123"),
("中文_角色", "中文_角色"),
("测试-角色", "测试-角色"),
("漢字データ", "漢字データ"),
("ABC中文123", "ABC中文123"),
("测试_Test-123", "测试_Test-123"),
("中文 Test Space", "中文_Test_Space"),
("中文角色@#$", "中文角色___"),
],
)
def test_mixed_character_support(role_name, expected_output):
"""Test that various mixed character scenarios work as expected."""
# Create a knowledge source with some content
content = "This is some test content."
string_source = StringKnowledgeSource(content=content)
# Create an agent with the test role name
agent = Agent(
role=role_name,
goal="Test mixed character support",
backstory="Testing mixed character support in agent role names.",
knowledge_sources=[string_source],
)
# Test that the regex pattern in agent.py correctly handles the role name
# Unicode ranges for CJK characters:
# \u4e00-\u9fff: Common Chinese characters
# \u3400-\u4dbf: Extended CJK characters
full_pattern = re.compile(r"[^\w\u4e00-\u9fff\u3400-\u4dbf\-_\r\n]|(\.\.)")
knowledge_agent_name = f"{re.sub(full_pattern, '_', agent.role)}"
# Verify that the agent was created successfully
assert agent.role == role_name
# Verify that the role name is processed correctly
assert knowledge_agent_name == expected_output