fix: hit-testing response failed because of Pydantic check. (#35640)
Some checks failed
autofix.ci / autofix (push) Has been cancelled
Build and Push API & Web / build (api, {{defaultContext}}:api, Dockerfile, DIFY_API_IMAGE_NAME, linux/amd64, depot-ubuntu-24.04-4, build-api-amd64) (push) Has been cancelled
Build and Push API & Web / build (api, {{defaultContext}}:api, Dockerfile, DIFY_API_IMAGE_NAME, linux/arm64, depot-ubuntu-24.04-4, build-api-arm64) (push) Has been cancelled
Build and Push API & Web / build (web, {{defaultContext}}, web/Dockerfile, DIFY_WEB_IMAGE_NAME, linux/amd64, depot-ubuntu-24.04-4, build-web-amd64) (push) Has been cancelled
Build and Push API & Web / build (web, {{defaultContext}}, web/Dockerfile, DIFY_WEB_IMAGE_NAME, linux/arm64, depot-ubuntu-24.04-4, build-web-arm64) (push) Has been cancelled
Build and Push API & Web / fork-build-validate ({{defaultContext}}, web/Dockerfile, validate-web-amd64) (push) Has been cancelled
Build and Push API & Web / fork-build-validate ({{defaultContext}}:api, Dockerfile, validate-api-amd64) (push) Has been cancelled
Build and Push API & Web / create-manifest (api, DIFY_API_IMAGE_NAME, merge-api-images) (push) Has been cancelled
Build and Push API & Web / create-manifest (web, DIFY_WEB_IMAGE_NAME, merge-web-images) (push) Has been cancelled
Main CI Pipeline / Skip Duplicate Checks (push) Has been cancelled
Main CI Pipeline / Check Changed Files (push) Has been cancelled
Main CI Pipeline / Run API Tests (push) Has been cancelled
Main CI Pipeline / Skip API Tests (push) Has been cancelled
Main CI Pipeline / API Tests (push) Has been cancelled
Main CI Pipeline / Run Web Tests (push) Has been cancelled
Main CI Pipeline / Skip Web Tests (push) Has been cancelled
Main CI Pipeline / Web Tests (push) Has been cancelled
Main CI Pipeline / Run Web Full-Stack E2E (push) Has been cancelled
Main CI Pipeline / Skip Web Full-Stack E2E (push) Has been cancelled
Main CI Pipeline / Web Full-Stack E2E (push) Has been cancelled
Main CI Pipeline / Style Check (push) Has been cancelled
Main CI Pipeline / Run VDB Tests (push) Has been cancelled
Main CI Pipeline / Skip VDB Tests (push) Has been cancelled
Main CI Pipeline / VDB Tests (push) Has been cancelled
Main CI Pipeline / Run DB Migration Test (push) Has been cancelled
Main CI Pipeline / Skip DB Migration Test (push) Has been cancelled
Main CI Pipeline / DB Migration Test (push) Has been cancelled
Mark stale issues and pull requests / stale (push) Has been cancelled

Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
Co-authored-by: Copilot Autofix powered by AI <175728472+Copilot@users.noreply.github.com>
This commit is contained in:
FFXN
2026-04-28 16:37:13 +08:00
committed by GitHub
parent d2e1da269c
commit 38eb04dc98
3 changed files with 135 additions and 1 deletions

View File

@@ -38,6 +38,48 @@ class HitTestingPayload(BaseModel):
class DatasetsHitTestingBase:
@staticmethod
def _normalize_hit_testing_query(query: Any) -> str:
"""Return the user-visible query string from legacy and current response shapes."""
if isinstance(query, str):
return query
if isinstance(query, dict):
content = query.get("content")
if isinstance(content, str):
return content
raise ValueError("Invalid hit testing query response")
@staticmethod
def _normalize_hit_testing_records(records: Any) -> list[dict[str, Any]]:
"""Coerce nullable collection fields into lists before response validation."""
if not isinstance(records, list):
return []
normalized_records: list[dict[str, Any]] = []
for record in records:
if not isinstance(record, dict):
continue
normalized_record = dict(record)
segment = normalized_record.get("segment")
if isinstance(segment, dict):
normalized_segment = dict(segment)
if normalized_segment.get("keywords") is None:
normalized_segment["keywords"] = []
normalized_record["segment"] = normalized_segment
if normalized_record.get("child_chunks") is None:
normalized_record["child_chunks"] = []
if normalized_record.get("files") is None:
normalized_record["files"] = []
normalized_records.append(normalized_record)
return normalized_records
@staticmethod
def get_and_validate_dataset(dataset_id: str):
assert isinstance(current_user, Account)
@@ -75,7 +117,12 @@ class DatasetsHitTestingBase:
attachment_ids=args.get("attachment_ids"),
limit=10,
)
return {"query": response["query"], "records": marshal(response["records"], hit_testing_record_fields)}
return {
"query": DatasetsHitTestingBase._normalize_hit_testing_query(response.get("query")),
"records": DatasetsHitTestingBase._normalize_hit_testing_records(
marshal(response.get("records", []), hit_testing_record_fields)
),
}
except services.errors.index.IndexNotInitializedError:
raise DatasetNotInitializedError()
except ProviderTokenNotInitError as ex:

View File

@@ -134,6 +134,42 @@ class TestPerformHitTesting:
assert result["query"] == "hello"
assert result["records"] == []
def test_success_normalizes_legacy_query_and_nullable_list_fields(self, dataset):
response = {
"query": {"content": "hello"},
"records": [
{
"segment": {"id": "segment-1", "keywords": None},
"child_chunks": None,
"files": None,
"score": 0.8,
}
],
}
with (
patch.object(
HitTestingService,
"retrieve",
return_value=response,
),
patch(
"controllers.console.datasets.hit_testing_base.marshal",
return_value=response["records"],
),
):
result = DatasetsHitTestingBase.perform_hit_testing(dataset, {"query": "hello"})
assert result["query"] == "hello"
assert result["records"] == [
{
"segment": {"id": "segment-1", "keywords": []},
"child_chunks": [],
"files": [],
"score": 0.8,
}
]
def test_index_not_initialized(self, dataset):
with patch.object(
HitTestingService,

View File

@@ -171,6 +171,57 @@ class TestHitTestingApiPost:
assert passed_retrieval_model["search_method"] == "semantic_search"
assert passed_retrieval_model["top_k"] == 10
@patch("controllers.service_api.dataset.hit_testing.service_api_ns")
@patch("controllers.console.datasets.hit_testing_base.marshal")
@patch("controllers.console.datasets.hit_testing_base.HitTestingService")
@patch("controllers.console.datasets.hit_testing_base.DatasetService")
@patch("controllers.console.datasets.hit_testing_base.current_user", new_callable=lambda: Mock(spec=Account))
def test_post_normalizes_legacy_query_and_nullable_list_fields(
self,
mock_current_user,
mock_dataset_svc,
mock_hit_svc,
mock_marshal,
mock_ns,
app,
):
"""Test service API normalizes legacy query shape and nullable list fields."""
dataset_id = str(uuid.uuid4())
tenant_id = str(uuid.uuid4())
mock_dataset = Mock()
mock_dataset.id = dataset_id
mock_dataset_svc.get_dataset.return_value = mock_dataset
mock_dataset_svc.check_dataset_permission.return_value = None
mock_hit_svc.retrieve.return_value = {"query": {"content": "legacy query"}, "records": ["placeholder"]}
mock_hit_svc.hit_testing_args_check.return_value = None
mock_marshal.return_value = [
{
"segment": {"id": "segment-1", "keywords": None},
"child_chunks": None,
"files": None,
"score": 0.9,
}
]
mock_ns.payload = {"query": "legacy query"}
with app.test_request_context():
api = HitTestingApi()
response = HitTestingApi.post.__wrapped__(api, tenant_id, dataset_id)
assert response["query"] == "legacy query"
assert response["records"] == [
{
"segment": {"id": "segment-1", "keywords": []},
"child_chunks": [],
"files": [],
"score": 0.9,
}
]
@patch("controllers.service_api.dataset.hit_testing.service_api_ns")
@patch("controllers.console.datasets.hit_testing_base.DatasetService")
@patch("controllers.console.datasets.hit_testing_base.current_user", new_callable=lambda: Mock(spec=Account))