mirror of
https://github.com/crewAIInc/crewAI.git
synced 2025-12-29 02:38:29 +00:00
Compare commits
4 Commits
lg-update-
...
devin/1744
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0e43a5f25e | ||
|
|
f4d2ad3004 | ||
|
|
8fe9099be0 | ||
|
|
8aec7b3364 |
@@ -37,6 +37,7 @@ dependencies = [
|
||||
"tomli>=2.0.2",
|
||||
"blinker>=1.9.0",
|
||||
"json5>=0.10.0",
|
||||
"unidecode>=1.3.8",
|
||||
]
|
||||
|
||||
[project.urls]
|
||||
|
||||
@@ -144,13 +144,15 @@ class Agent(BaseAgent):
|
||||
self.embedder = crew_embedder
|
||||
|
||||
if self.knowledge_sources:
|
||||
from crewai.utilities import sanitize_collection_name
|
||||
|
||||
if isinstance(self.knowledge_sources, list) and all(
|
||||
isinstance(k, BaseKnowledgeSource) for k in self.knowledge_sources
|
||||
):
|
||||
self.knowledge = Knowledge(
|
||||
sources=self.knowledge_sources,
|
||||
embedder=self.embedder,
|
||||
collection_name=self.role,
|
||||
collection_name=sanitize_collection_name(self.role),
|
||||
storage=self.knowledge_storage or None,
|
||||
)
|
||||
except (TypeError, ValueError) as e:
|
||||
|
||||
@@ -11,6 +11,7 @@ from .exceptions.context_window_exceeding_exception import (
|
||||
LLMContextLengthExceededException,
|
||||
)
|
||||
from .embedding_configurator import EmbeddingConfigurator
|
||||
from .string_utils import sanitize_collection_name
|
||||
|
||||
__all__ = [
|
||||
"Converter",
|
||||
@@ -25,4 +26,5 @@ __all__ = [
|
||||
"YamlParser",
|
||||
"LLMContextLengthExceededException",
|
||||
"EmbeddingConfigurator",
|
||||
"sanitize_collection_name",
|
||||
]
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import re
|
||||
from typing import Any, Dict, List, Optional, Union
|
||||
|
||||
from unidecode import unidecode
|
||||
|
||||
|
||||
def interpolate_only(
|
||||
input_string: Optional[str],
|
||||
@@ -80,3 +82,39 @@ def interpolate_only(
|
||||
result = result.replace(placeholder, value)
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def sanitize_collection_name(name: str) -> str:
|
||||
"""
|
||||
Sanitizes a string to be used as a ChromaDB collection name.
|
||||
|
||||
ChromaDB collection names must:
|
||||
1. Contain 3-63 characters
|
||||
2. Start and end with an alphanumeric character
|
||||
3. Otherwise contain only alphanumeric characters, underscores or hyphens (-)
|
||||
4. Contain no two consecutive periods (..)
|
||||
5. Not be a valid IPv4 address
|
||||
|
||||
Args:
|
||||
name: The string to sanitize
|
||||
|
||||
Returns:
|
||||
A sanitized string that can be used as a ChromaDB collection name
|
||||
"""
|
||||
name = unidecode(name)
|
||||
|
||||
name = re.sub(r'[^\w\-]', '_', name)
|
||||
|
||||
name = re.sub(r'_+', '_', name)
|
||||
|
||||
name = re.sub(r'^[^a-zA-Z0-9]+', '', name)
|
||||
name = re.sub(r'[^a-zA-Z0-9]+$', '', name)
|
||||
|
||||
if len(name) < 3:
|
||||
name = name + 'x' * (3 - len(name))
|
||||
|
||||
if len(name) > 63:
|
||||
name = name[:63]
|
||||
name = re.sub(r'[^a-zA-Z0-9]+$', '', name)
|
||||
|
||||
return name
|
||||
|
||||
@@ -0,0 +1,80 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: '{"input": ["GraphQL is a query language for APIs and a runtime for fulfilling
|
||||
those queries with your existing data."], "model": "text-embedding-3-small",
|
||||
"encoding_format": "base64"}'
|
||||
headers:
|
||||
accept:
|
||||
- application/json
|
||||
accept-encoding:
|
||||
- gzip, deflate
|
||||
connection:
|
||||
- keep-alive
|
||||
content-length:
|
||||
- '183'
|
||||
content-type:
|
||||
- application/json
|
||||
host:
|
||||
- api.openai.com
|
||||
user-agent:
|
||||
- OpenAI/Python 1.68.2
|
||||
x-stainless-arch:
|
||||
- x64
|
||||
x-stainless-async:
|
||||
- 'false'
|
||||
x-stainless-lang:
|
||||
- python
|
||||
x-stainless-os:
|
||||
- Linux
|
||||
x-stainless-package-version:
|
||||
- 1.68.2
|
||||
x-stainless-read-timeout:
|
||||
- '600'
|
||||
x-stainless-retry-count:
|
||||
- '0'
|
||||
x-stainless-runtime:
|
||||
- CPython
|
||||
x-stainless-runtime-version:
|
||||
- 3.12.7
|
||||
method: POST
|
||||
uri: https://api.openai.com/v1/embeddings
|
||||
response:
|
||||
content: "{\n \"error\": {\n \"message\": \"Incorrect API key provided:
|
||||
sk-proj-********************************************************************************************************************************************************sLcA.
|
||||
You can find your API key at https://platform.openai.com/account/api-keys.\",\n
|
||||
\ \"type\": \"invalid_request_error\",\n \"param\": null,\n \"code\":
|
||||
\"invalid_api_key\"\n }\n}\n"
|
||||
headers:
|
||||
CF-RAY:
|
||||
- 92ce9b496bd476a0-SEA
|
||||
Connection:
|
||||
- keep-alive
|
||||
Content-Length:
|
||||
- '414'
|
||||
Content-Type:
|
||||
- application/json; charset=utf-8
|
||||
Date:
|
||||
- Tue, 08 Apr 2025 03:16:33 GMT
|
||||
Server:
|
||||
- cloudflare
|
||||
Set-Cookie:
|
||||
- __cf_bm=S0TpOceTcU79JlMhdky0iDf4sfHcnaP9JOimob0aF1M-1744082193-1.0.1.1-owOv6J9Tl7Kh5ltS4PUm_QuIO.Z98d8uzA4T19GK1BSWsRL6W6qUKRO1OPDRRtX6hYUcriLQUhl3drxTf_Ck7oq1ueg8sSI3NSvmKQX4xMY;
|
||||
path=/; expires=Tue, 08-Apr-25 03:46:33 GMT; domain=.api.openai.com; HttpOnly;
|
||||
Secure; SameSite=None
|
||||
- _cfuvid=033vbIHDWmO2i_U_qbo8bvpL4uiHSy5yuML2pM2GnKM-1744082193025-0.0.1.1-604800000;
|
||||
path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
|
||||
X-Content-Type-Options:
|
||||
- nosniff
|
||||
alt-svc:
|
||||
- h3=":443"; ma=86400
|
||||
cf-cache-status:
|
||||
- DYNAMIC
|
||||
strict-transport-security:
|
||||
- max-age=31536000; includeSubDomains; preload
|
||||
vary:
|
||||
- Origin
|
||||
x-request-id:
|
||||
- req_4d2830717fd2a87766200324d4a30907
|
||||
http_version: HTTP/1.1
|
||||
status_code: 401
|
||||
version: 1
|
||||
61
tests/test_agent_non_ascii.py
Normal file
61
tests/test_agent_non_ascii.py
Normal file
@@ -0,0 +1,61 @@
|
||||
import pytest
|
||||
|
||||
from crewai.utilities import sanitize_collection_name
|
||||
|
||||
|
||||
def test_sanitize_collection_name_with_non_ascii_chars():
|
||||
"""Test that sanitize_collection_name properly handles non-ASCII characters."""
|
||||
chinese_role = "一位有 20 年经验的 GraphQL 查询专家"
|
||||
sanitized_name = sanitize_collection_name(chinese_role)
|
||||
|
||||
assert len(sanitized_name) >= 3
|
||||
assert len(sanitized_name) <= 63
|
||||
assert sanitized_name[0].isalnum()
|
||||
assert sanitized_name[-1].isalnum()
|
||||
assert all(c.isalnum() or c == '_' or c == '-' for c in sanitized_name)
|
||||
assert '__' not in sanitized_name # No consecutive underscores
|
||||
|
||||
special_chars_role = "Café Owner & Barista (España) 🇪🇸"
|
||||
sanitized_name = sanitize_collection_name(special_chars_role)
|
||||
|
||||
assert len(sanitized_name) >= 3
|
||||
assert len(sanitized_name) <= 63
|
||||
assert sanitized_name[0].isalnum()
|
||||
assert sanitized_name[-1].isalnum()
|
||||
assert all(c.isalnum() or c == '_' or c == '-' for c in sanitized_name)
|
||||
assert '__' not in sanitized_name # No consecutive underscores
|
||||
|
||||
|
||||
def test_sanitize_collection_name_edge_cases():
|
||||
"""Test edge cases for sanitize_collection_name function."""
|
||||
empty_role = ""
|
||||
sanitized_name = sanitize_collection_name(empty_role)
|
||||
assert len(sanitized_name) >= 3 # Should be padded to minimum length
|
||||
|
||||
special_only = "!@#$%^&*()"
|
||||
sanitized_name = sanitize_collection_name(special_only)
|
||||
assert len(sanitized_name) >= 3
|
||||
assert sanitized_name[0].isalnum()
|
||||
assert sanitized_name[-1].isalnum()
|
||||
|
||||
long_role = "a" * 100
|
||||
sanitized_name = sanitize_collection_name(long_role)
|
||||
assert len(sanitized_name) <= 63
|
||||
|
||||
consecutive_spaces = "Hello World"
|
||||
sanitized_name = sanitize_collection_name(consecutive_spaces)
|
||||
assert "__" not in sanitized_name
|
||||
|
||||
|
||||
def test_sanitize_collection_name_reproduces_issue_2534():
|
||||
"""Test that reproduces the specific issue from #2534."""
|
||||
problematic_role = "一位有 20 年经验的 GraphQL 查询专家"
|
||||
|
||||
sanitized_name = sanitize_collection_name(problematic_role)
|
||||
|
||||
assert len(sanitized_name) >= 3
|
||||
assert len(sanitized_name) <= 63
|
||||
assert sanitized_name[0].isalnum()
|
||||
assert sanitized_name[-1].isalnum()
|
||||
assert all(c.isalnum() or c == '_' or c == '-' for c in sanitized_name)
|
||||
assert '__' not in sanitized_name # No consecutive underscores
|
||||
15
uv.lock
generated
15
uv.lock
generated
@@ -1,5 +1,4 @@
|
||||
version = 1
|
||||
revision = 1
|
||||
requires-python = ">=3.10, <3.13"
|
||||
resolution-markers = [
|
||||
"python_full_version < '3.11' and sys_platform == 'darwin'",
|
||||
@@ -632,6 +631,7 @@ dependencies = [
|
||||
{ name = "regex" },
|
||||
{ name = "tomli" },
|
||||
{ name = "tomli-w" },
|
||||
{ name = "unidecode" },
|
||||
{ name = "uv" },
|
||||
]
|
||||
|
||||
@@ -720,9 +720,9 @@ requires-dist = [
|
||||
{ name = "tiktoken", marker = "extra == 'embeddings'", specifier = "~=0.7.0" },
|
||||
{ name = "tomli", specifier = ">=2.0.2" },
|
||||
{ name = "tomli-w", specifier = ">=1.1.0" },
|
||||
{ name = "unidecode", specifier = ">=1.3.8" },
|
||||
{ name = "uv", specifier = ">=0.4.25" },
|
||||
]
|
||||
provides-extras = ["tools", "embeddings", "agentops", "fastembed", "pdfplumber", "pandas", "openpyxl", "mem0", "docling", "aisuite"]
|
||||
|
||||
[package.metadata.requires-dev]
|
||||
dev = [
|
||||
@@ -2975,6 +2975,7 @@ name = "nvidia-nccl-cu12"
|
||||
version = "2.20.5"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/c1/bb/d09dda47c881f9ff504afd6f9ca4f502ded6d8fc2f572cacc5e39da91c28/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1fc150d5c3250b170b29410ba682384b14581db722b2531b0d8d33c595f33d01", size = 176238458 },
|
||||
{ url = "https://files.pythonhosted.org/packages/4b/2a/0a131f572aa09f741c30ccd45a8e56316e8be8dfc7bc19bf0ab7cfef7b19/nvidia_nccl_cu12-2.20.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:057f6bf9685f75215d0c53bf3ac4a10b3e6578351de307abad9e18a99182af56", size = 176249402 },
|
||||
]
|
||||
|
||||
@@ -2984,6 +2985,7 @@ version = "12.6.85"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/9d/d7/c5383e47c7e9bf1c99d5bd2a8c935af2b6d705ad831a7ec5c97db4d82f4f/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2010_x86_64.manylinux_2_12_x86_64.whl", hash = "sha256:eedc36df9e88b682efe4309aa16b5b4e78c2407eac59e8c10a6a47535164369a", size = 19744971 },
|
||||
{ url = "https://files.pythonhosted.org/packages/31/db/dc71113d441f208cdfe7ae10d4983884e13f464a6252450693365e166dcf/nvidia_nvjitlink_cu12-12.6.85-py3-none-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cf4eaa7d4b6b543ffd69d6abfb11efdeb2db48270d94dfd3a452c24150829e41", size = 19270338 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
@@ -5175,6 +5177,15 @@ wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/a6/ab/7e5f53c3b9d14972843a647d8d7a853969a58aecc7559cb3267302c94774/tzdata-2024.2-py2.py3-none-any.whl", hash = "sha256:a48093786cdcde33cad18c2555e8532f34422074448fbc874186f0abd79565cd", size = 346586 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unidecode"
|
||||
version = "1.3.8"
|
||||
source = { registry = "https://pypi.org/simple" }
|
||||
sdist = { url = "https://files.pythonhosted.org/packages/f7/89/19151076a006b9ac0dd37b1354e031f5297891ee507eb624755e58e10d3e/Unidecode-1.3.8.tar.gz", hash = "sha256:cfdb349d46ed3873ece4586b96aa75258726e2fa8ec21d6f00a591d98806c2f4", size = 192701 }
|
||||
wheels = [
|
||||
{ url = "https://files.pythonhosted.org/packages/84/b7/6ec57841fb67c98f52fc8e4a2d96df60059637cba077edc569a302a8ffc7/Unidecode-1.3.8-py3-none-any.whl", hash = "sha256:d130a61ce6696f8148a3bd8fe779c99adeb4b870584eeb9526584e9aa091fd39", size = 235494 },
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "urllib3"
|
||||
version = "2.2.3"
|
||||
|
||||
Reference in New Issue
Block a user