chore: port one api (#35609)

This commit is contained in:
Asuka Minato
2026-04-28 17:22:01 +09:00
committed by GitHub
parent 1d3498f659
commit d2e1da269c
2 changed files with 169 additions and 99 deletions

View File

@@ -468,15 +468,98 @@ class DocumentAddByFileApi(DatasetApiResource):
return documents_and_batch_fields, 200
def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
"""Update a document from an uploaded file for canonical and deprecated routes."""
dataset_id_str = str(dataset_id)
tenant_id_str = str(tenant_id)
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id_str, Dataset.id == dataset_id_str).limit(1)
)
if not dataset:
raise ValueError("Dataset does not exist.")
if dataset.provider == "external":
raise ValueError("External datasets are not supported.")
args: dict[str, object] = {}
if "data" in request.form:
args = json.loads(request.form["data"])
if "doc_form" not in args:
args["doc_form"] = dataset.chunk_structure or "text_model"
if "doc_language" not in args:
args["doc_language"] = "English"
# indexing_technique is already set in dataset since this is an update
args["indexing_technique"] = dataset.indexing_technique
if "file" in request.files:
# save file info
file = request.files["file"]
if len(request.files) > 1:
raise TooManyFilesError()
if not file.filename:
raise FilenameNotExistsError
if not current_user:
raise ValueError("current_user is required")
try:
upload_file = FileService(db.engine).upload_file(
filename=file.filename,
content=file.read(),
mimetype=file.mimetype,
user=current_user,
source="datasets",
)
except services.errors.file.FileTooLargeError as file_too_large_error:
raise FileTooLargeError(file_too_large_error.description)
except services.errors.file.UnsupportedFileTypeError:
raise UnsupportedFileTypeError()
data_source = {
"type": "upload_file",
"info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
}
args["data_source"] = data_source
# validate args
args["original_document_id"] = str(document_id)
knowledge_config = KnowledgeConfig.model_validate(args)
DocumentService.document_create_args_validate(knowledge_config)
try:
documents, _ = DocumentService.save_document_with_dataset_id(
dataset=dataset,
knowledge_config=knowledge_config,
account=dataset.created_by_account,
dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
created_from="api",
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
return documents_and_batch_fields, 200
@service_api_ns.route(
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
)
class DocumentUpdateByFileApi(DatasetApiResource):
"""Resource for update documents."""
class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
"""Deprecated resource aliases for file document updates."""
@service_api_ns.doc("update_document_by_file")
@service_api_ns.doc(description="Update an existing document by uploading a file")
@service_api_ns.doc("update_document_by_file_deprecated")
@service_api_ns.doc(deprecated=True)
@service_api_ns.doc(
description=(
"Deprecated legacy alias for updating an existing document by uploading a file. "
"Use PATCH /datasets/{dataset_id}/documents/{document_id} instead."
)
)
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(
responses={
@@ -487,82 +570,9 @@ class DocumentUpdateByFileApi(DatasetApiResource):
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def post(self, tenant_id, dataset_id, document_id):
"""Update document by upload file."""
dataset = db.session.scalar(
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
)
if not dataset:
raise ValueError("Dataset does not exist.")
if dataset.provider == "external":
raise ValueError("External datasets are not supported.")
args = {}
if "data" in request.form:
args = json.loads(request.form["data"])
if "doc_form" not in args:
args["doc_form"] = dataset.chunk_structure or "text_model"
if "doc_language" not in args:
args["doc_language"] = "English"
# get dataset info
dataset_id = str(dataset_id)
tenant_id = str(tenant_id)
# indexing_technique is already set in dataset since this is an update
args["indexing_technique"] = dataset.indexing_technique
if "file" in request.files:
# save file info
file = request.files["file"]
if len(request.files) > 1:
raise TooManyFilesError()
if not file.filename:
raise FilenameNotExistsError
if not current_user:
raise ValueError("current_user is required")
try:
upload_file = FileService(db.engine).upload_file(
filename=file.filename,
content=file.read(),
mimetype=file.mimetype,
user=current_user,
source="datasets",
)
except services.errors.file.FileTooLargeError as file_too_large_error:
raise FileTooLargeError(file_too_large_error.description)
except services.errors.file.UnsupportedFileTypeError:
raise UnsupportedFileTypeError()
data_source = {
"type": "upload_file",
"info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
}
args["data_source"] = data_source
# validate args
args["original_document_id"] = str(document_id)
knowledge_config = KnowledgeConfig.model_validate(args)
DocumentService.document_create_args_validate(knowledge_config)
try:
documents, _ = DocumentService.save_document_with_dataset_id(
dataset=dataset,
knowledge_config=knowledge_config,
account=dataset.created_by_account,
dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
created_from="api",
)
except ProviderTokenNotInitError as ex:
raise ProviderNotInitializeError(ex.description)
document = documents[0]
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
return documents_and_batch_fields, 200
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
"""Update document by file through the deprecated file-update aliases."""
return _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id)
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents")
@@ -876,6 +886,22 @@ class DocumentApi(DatasetApiResource):
return response
@service_api_ns.doc("update_document_by_file")
@service_api_ns.doc(description="Update an existing document by uploading a file")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
@service_api_ns.doc(
responses={
200: "Document updated successfully",
401: "Unauthorized - invalid API token",
404: "Document not found",
}
)
@cloud_edition_billing_resource_check("vector_space", "dataset")
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
"""Update document by file on the canonical document resource."""
return _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id)
@service_api_ns.doc("delete_document")
@service_api_ns.doc(description="Delete a document")
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})

View File

@@ -23,6 +23,7 @@ from werkzeug.exceptions import Forbidden, NotFound
from controllers.service_api.dataset.document import (
DeprecatedDocumentAddByTextApi,
DeprecatedDocumentUpdateByFileApi,
DeprecatedDocumentUpdateByTextApi,
DocumentAddByFileApi,
DocumentAddByTextApi,
@@ -32,7 +33,6 @@ from controllers.service_api.dataset.document import (
DocumentListQuery,
DocumentTextCreatePayload,
DocumentTextUpdate,
DocumentUpdateByFileApi,
DocumentUpdateByTextApi,
InvalidMetadataError,
)
@@ -1095,8 +1095,8 @@ class TestArchivedDocumentImmutableError:
assert error.code == 403
class TestDocumentTextRouteDeprecation:
"""Test that legacy underscore text routes stay marked deprecated."""
class TestDocumentRouteDeprecation:
"""Test that legacy document routes stay marked deprecated."""
def test_create_by_text_legacy_alias_is_deprecated(self):
"""Ensure only the legacy create-by-text alias is marked deprecated."""
@@ -1108,10 +1108,15 @@ class TestDocumentTextRouteDeprecation:
assert DeprecatedDocumentUpdateByTextApi.post.__apidoc__["deprecated"] is True
assert DocumentUpdateByTextApi.post.__apidoc__.get("deprecated") is not True
def test_update_by_file_legacy_aliases_are_deprecated(self):
"""Ensure only the legacy file-update aliases are marked deprecated."""
assert DeprecatedDocumentUpdateByFileApi.post.__apidoc__["deprecated"] is True
assert DocumentApi.patch.__apidoc__.get("deprecated") is not True
# =============================================================================
# Endpoint tests for DocumentUpdateByTextApi, DocumentAddByFileApi,
# DocumentUpdateByFileApi.
# and the canonical/deprecated document file update routes.
#
# These controllers use ``@cloud_edition_billing_resource_check`` (does NOT
# preserve ``__wrapped__``) and ``@cloud_edition_billing_rate_limit_check``
@@ -1359,13 +1364,52 @@ class TestDocumentAddByFileApiPost:
api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)
class TestDocumentUpdateByFileApiPost:
"""Test suite for DocumentUpdateByFileApi.post() endpoint.
class TestDocumentUpdateByFileApiPatch:
"""Test suite for the canonical document file update endpoint.
``post`` is wrapped by ``@cloud_edition_billing_resource_check`` and
``patch`` is wrapped by ``@cloud_edition_billing_resource_check`` and
``@cloud_edition_billing_rate_limit_check``.
"""
@pytest.mark.parametrize("route_name", ["update_by_file", "update-by-file"])
@patch("controllers.service_api.dataset.document._update_document_by_file")
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
def test_update_by_file_deprecated_aliases_delegate_to_shared_handler(
self,
mock_validate_token,
mock_feature_svc,
mock_update_document_by_file,
route_name,
app,
mock_tenant,
mock_dataset,
):
"""Test legacy POST aliases still dispatch while marked deprecated."""
_setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
mock_update_document_by_file.return_value = ({"document": {"id": "doc-1"}, "batch": "batch-1"}, 200)
doc_id = str(uuid.uuid4())
with app.test_request_context(
f"/datasets/{mock_dataset.id}/documents/{doc_id}/{route_name}",
method="POST",
headers={"Authorization": "Bearer test_token"},
):
api = DeprecatedDocumentUpdateByFileApi()
response, status = api.post(
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
document_id=doc_id,
)
assert status == 200
assert response["batch"] == "batch-1"
mock_update_document_by_file.assert_called_once_with(
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
document_id=doc_id,
)
@patch("controllers.service_api.dataset.document.db")
@patch("controllers.service_api.wraps.FeatureService")
@patch("controllers.service_api.wraps.validate_and_get_api_token")
@@ -1387,15 +1431,15 @@ class TestDocumentUpdateByFileApiPost:
doc_id = str(uuid.uuid4())
data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
with app.test_request_context(
f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_file",
method="POST",
f"/datasets/{mock_dataset.id}/documents/{doc_id}",
method="PATCH",
content_type="multipart/form-data",
data=data,
headers={"Authorization": "Bearer test_token"},
):
api = DocumentUpdateByFileApi()
api = DocumentApi()
with pytest.raises(ValueError, match="Dataset does not exist"):
api.post(
api.patch(
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
document_id=doc_id,
@@ -1423,15 +1467,15 @@ class TestDocumentUpdateByFileApiPost:
doc_id = str(uuid.uuid4())
data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
with app.test_request_context(
f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_file",
method="POST",
f"/datasets/{mock_dataset.id}/documents/{doc_id}",
method="PATCH",
content_type="multipart/form-data",
data=data,
headers={"Authorization": "Bearer test_token"},
):
api = DocumentUpdateByFileApi()
api = DocumentApi()
with pytest.raises(ValueError, match="External datasets"):
api.post(
api.patch(
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
document_id=doc_id,
@@ -1482,14 +1526,14 @@ class TestDocumentUpdateByFileApiPost:
doc_id = str(uuid.uuid4())
data = {"file": (BytesIO(b"file content"), "test.pdf", "application/pdf")}
with app.test_request_context(
f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_file",
method="POST",
f"/datasets/{mock_dataset.id}/documents/{doc_id}",
method="PATCH",
content_type="multipart/form-data",
data=data,
headers={"Authorization": "Bearer test_token"},
):
api = DocumentUpdateByFileApi()
response, status = api.post(
api = DocumentApi()
response, status = api.patch(
tenant_id=mock_tenant.id,
dataset_id=mock_dataset.id,
document_id=doc_id,