mirror of
https://mirror.skon.top/github.com/langgenius/dify.git
synced 2026-05-01 11:40:12 +08:00
chore: port one api (#35609)
This commit is contained in:
@@ -468,15 +468,98 @@ class DocumentAddByFileApi(DatasetApiResource):
|
||||
return documents_and_batch_fields, 200
|
||||
|
||||
|
||||
def _update_document_by_file(tenant_id: str, dataset_id: UUID, document_id: UUID) -> tuple[Mapping[str, object], int]:
|
||||
"""Update a document from an uploaded file for canonical and deprecated routes."""
|
||||
dataset_id_str = str(dataset_id)
|
||||
tenant_id_str = str(tenant_id)
|
||||
dataset = db.session.scalar(
|
||||
select(Dataset).where(Dataset.tenant_id == tenant_id_str, Dataset.id == dataset_id_str).limit(1)
|
||||
)
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("Dataset does not exist.")
|
||||
|
||||
if dataset.provider == "external":
|
||||
raise ValueError("External datasets are not supported.")
|
||||
|
||||
args: dict[str, object] = {}
|
||||
if "data" in request.form:
|
||||
args = json.loads(request.form["data"])
|
||||
if "doc_form" not in args:
|
||||
args["doc_form"] = dataset.chunk_structure or "text_model"
|
||||
if "doc_language" not in args:
|
||||
args["doc_language"] = "English"
|
||||
|
||||
# indexing_technique is already set in dataset since this is an update
|
||||
args["indexing_technique"] = dataset.indexing_technique
|
||||
|
||||
if "file" in request.files:
|
||||
# save file info
|
||||
file = request.files["file"]
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
if not current_user:
|
||||
raise ValueError("current_user is required")
|
||||
|
||||
try:
|
||||
upload_file = FileService(db.engine).upload_file(
|
||||
filename=file.filename,
|
||||
content=file.read(),
|
||||
mimetype=file.mimetype,
|
||||
user=current_user,
|
||||
source="datasets",
|
||||
)
|
||||
except services.errors.file.FileTooLargeError as file_too_large_error:
|
||||
raise FileTooLargeError(file_too_large_error.description)
|
||||
except services.errors.file.UnsupportedFileTypeError:
|
||||
raise UnsupportedFileTypeError()
|
||||
data_source = {
|
||||
"type": "upload_file",
|
||||
"info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
|
||||
}
|
||||
args["data_source"] = data_source
|
||||
|
||||
# validate args
|
||||
args["original_document_id"] = str(document_id)
|
||||
|
||||
knowledge_config = KnowledgeConfig.model_validate(args)
|
||||
DocumentService.document_create_args_validate(knowledge_config)
|
||||
|
||||
try:
|
||||
documents, _ = DocumentService.save_document_with_dataset_id(
|
||||
dataset=dataset,
|
||||
knowledge_config=knowledge_config,
|
||||
account=dataset.created_by_account,
|
||||
dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
|
||||
created_from="api",
|
||||
)
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
document = documents[0]
|
||||
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
|
||||
return documents_and_batch_fields, 200
|
||||
|
||||
|
||||
@service_api_ns.route(
|
||||
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update_by_file",
|
||||
"/datasets/<uuid:dataset_id>/documents/<uuid:document_id>/update-by-file",
|
||||
)
|
||||
class DocumentUpdateByFileApi(DatasetApiResource):
|
||||
"""Resource for update documents."""
|
||||
class DeprecatedDocumentUpdateByFileApi(DatasetApiResource):
|
||||
"""Deprecated resource aliases for file document updates."""
|
||||
|
||||
@service_api_ns.doc("update_document_by_file")
|
||||
@service_api_ns.doc(description="Update an existing document by uploading a file")
|
||||
@service_api_ns.doc("update_document_by_file_deprecated")
|
||||
@service_api_ns.doc(deprecated=True)
|
||||
@service_api_ns.doc(
|
||||
description=(
|
||||
"Deprecated legacy alias for updating an existing document by uploading a file. "
|
||||
"Use PATCH /datasets/{dataset_id}/documents/{document_id} instead."
|
||||
)
|
||||
)
|
||||
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
@@ -487,82 +570,9 @@ class DocumentUpdateByFileApi(DatasetApiResource):
|
||||
)
|
||||
@cloud_edition_billing_resource_check("vector_space", "dataset")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def post(self, tenant_id, dataset_id, document_id):
|
||||
"""Update document by upload file."""
|
||||
dataset = db.session.scalar(
|
||||
select(Dataset).where(Dataset.tenant_id == tenant_id, Dataset.id == dataset_id).limit(1)
|
||||
)
|
||||
|
||||
if not dataset:
|
||||
raise ValueError("Dataset does not exist.")
|
||||
|
||||
if dataset.provider == "external":
|
||||
raise ValueError("External datasets are not supported.")
|
||||
|
||||
args = {}
|
||||
if "data" in request.form:
|
||||
args = json.loads(request.form["data"])
|
||||
if "doc_form" not in args:
|
||||
args["doc_form"] = dataset.chunk_structure or "text_model"
|
||||
if "doc_language" not in args:
|
||||
args["doc_language"] = "English"
|
||||
|
||||
# get dataset info
|
||||
dataset_id = str(dataset_id)
|
||||
tenant_id = str(tenant_id)
|
||||
|
||||
# indexing_technique is already set in dataset since this is an update
|
||||
args["indexing_technique"] = dataset.indexing_technique
|
||||
|
||||
if "file" in request.files:
|
||||
# save file info
|
||||
file = request.files["file"]
|
||||
|
||||
if len(request.files) > 1:
|
||||
raise TooManyFilesError()
|
||||
|
||||
if not file.filename:
|
||||
raise FilenameNotExistsError
|
||||
|
||||
if not current_user:
|
||||
raise ValueError("current_user is required")
|
||||
|
||||
try:
|
||||
upload_file = FileService(db.engine).upload_file(
|
||||
filename=file.filename,
|
||||
content=file.read(),
|
||||
mimetype=file.mimetype,
|
||||
user=current_user,
|
||||
source="datasets",
|
||||
)
|
||||
except services.errors.file.FileTooLargeError as file_too_large_error:
|
||||
raise FileTooLargeError(file_too_large_error.description)
|
||||
except services.errors.file.UnsupportedFileTypeError:
|
||||
raise UnsupportedFileTypeError()
|
||||
data_source = {
|
||||
"type": "upload_file",
|
||||
"info_list": {"data_source_type": "upload_file", "file_info_list": {"file_ids": [upload_file.id]}},
|
||||
}
|
||||
args["data_source"] = data_source
|
||||
# validate args
|
||||
args["original_document_id"] = str(document_id)
|
||||
|
||||
knowledge_config = KnowledgeConfig.model_validate(args)
|
||||
DocumentService.document_create_args_validate(knowledge_config)
|
||||
|
||||
try:
|
||||
documents, _ = DocumentService.save_document_with_dataset_id(
|
||||
dataset=dataset,
|
||||
knowledge_config=knowledge_config,
|
||||
account=dataset.created_by_account,
|
||||
dataset_process_rule=dataset.latest_process_rule if "process_rule" not in args else None,
|
||||
created_from="api",
|
||||
)
|
||||
except ProviderTokenNotInitError as ex:
|
||||
raise ProviderNotInitializeError(ex.description)
|
||||
document = documents[0]
|
||||
documents_and_batch_fields = {"document": marshal(document, document_fields), "batch": document.batch}
|
||||
return documents_and_batch_fields, 200
|
||||
def post(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
|
||||
"""Update document by file through the deprecated file-update aliases."""
|
||||
return _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id)
|
||||
|
||||
|
||||
@service_api_ns.route("/datasets/<uuid:dataset_id>/documents")
|
||||
@@ -876,6 +886,22 @@ class DocumentApi(DatasetApiResource):
|
||||
|
||||
return response
|
||||
|
||||
@service_api_ns.doc("update_document_by_file")
|
||||
@service_api_ns.doc(description="Update an existing document by uploading a file")
|
||||
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
|
||||
@service_api_ns.doc(
|
||||
responses={
|
||||
200: "Document updated successfully",
|
||||
401: "Unauthorized - invalid API token",
|
||||
404: "Document not found",
|
||||
}
|
||||
)
|
||||
@cloud_edition_billing_resource_check("vector_space", "dataset")
|
||||
@cloud_edition_billing_rate_limit_check("knowledge", "dataset")
|
||||
def patch(self, tenant_id: str, dataset_id: UUID, document_id: UUID):
|
||||
"""Update document by file on the canonical document resource."""
|
||||
return _update_document_by_file(tenant_id=tenant_id, dataset_id=dataset_id, document_id=document_id)
|
||||
|
||||
@service_api_ns.doc("delete_document")
|
||||
@service_api_ns.doc(description="Delete a document")
|
||||
@service_api_ns.doc(params={"dataset_id": "Dataset ID", "document_id": "Document ID"})
|
||||
|
||||
@@ -23,6 +23,7 @@ from werkzeug.exceptions import Forbidden, NotFound
|
||||
|
||||
from controllers.service_api.dataset.document import (
|
||||
DeprecatedDocumentAddByTextApi,
|
||||
DeprecatedDocumentUpdateByFileApi,
|
||||
DeprecatedDocumentUpdateByTextApi,
|
||||
DocumentAddByFileApi,
|
||||
DocumentAddByTextApi,
|
||||
@@ -32,7 +33,6 @@ from controllers.service_api.dataset.document import (
|
||||
DocumentListQuery,
|
||||
DocumentTextCreatePayload,
|
||||
DocumentTextUpdate,
|
||||
DocumentUpdateByFileApi,
|
||||
DocumentUpdateByTextApi,
|
||||
InvalidMetadataError,
|
||||
)
|
||||
@@ -1095,8 +1095,8 @@ class TestArchivedDocumentImmutableError:
|
||||
assert error.code == 403
|
||||
|
||||
|
||||
class TestDocumentTextRouteDeprecation:
|
||||
"""Test that legacy underscore text routes stay marked deprecated."""
|
||||
class TestDocumentRouteDeprecation:
|
||||
"""Test that legacy document routes stay marked deprecated."""
|
||||
|
||||
def test_create_by_text_legacy_alias_is_deprecated(self):
|
||||
"""Ensure only the legacy create-by-text alias is marked deprecated."""
|
||||
@@ -1108,10 +1108,15 @@ class TestDocumentTextRouteDeprecation:
|
||||
assert DeprecatedDocumentUpdateByTextApi.post.__apidoc__["deprecated"] is True
|
||||
assert DocumentUpdateByTextApi.post.__apidoc__.get("deprecated") is not True
|
||||
|
||||
def test_update_by_file_legacy_aliases_are_deprecated(self):
|
||||
"""Ensure only the legacy file-update aliases are marked deprecated."""
|
||||
assert DeprecatedDocumentUpdateByFileApi.post.__apidoc__["deprecated"] is True
|
||||
assert DocumentApi.patch.__apidoc__.get("deprecated") is not True
|
||||
|
||||
|
||||
# =============================================================================
|
||||
# Endpoint tests for DocumentUpdateByTextApi, DocumentAddByFileApi,
|
||||
# DocumentUpdateByFileApi.
|
||||
# and the canonical/deprecated document file update routes.
|
||||
#
|
||||
# These controllers use ``@cloud_edition_billing_resource_check`` (does NOT
|
||||
# preserve ``__wrapped__``) and ``@cloud_edition_billing_rate_limit_check``
|
||||
@@ -1359,13 +1364,52 @@ class TestDocumentAddByFileApiPost:
|
||||
api.post(tenant_id=mock_tenant.id, dataset_id=mock_dataset.id)
|
||||
|
||||
|
||||
class TestDocumentUpdateByFileApiPost:
|
||||
"""Test suite for DocumentUpdateByFileApi.post() endpoint.
|
||||
class TestDocumentUpdateByFileApiPatch:
|
||||
"""Test suite for the canonical document file update endpoint.
|
||||
|
||||
``post`` is wrapped by ``@cloud_edition_billing_resource_check`` and
|
||||
``patch`` is wrapped by ``@cloud_edition_billing_resource_check`` and
|
||||
``@cloud_edition_billing_rate_limit_check``.
|
||||
"""
|
||||
|
||||
@pytest.mark.parametrize("route_name", ["update_by_file", "update-by-file"])
|
||||
@patch("controllers.service_api.dataset.document._update_document_by_file")
|
||||
@patch("controllers.service_api.wraps.FeatureService")
|
||||
@patch("controllers.service_api.wraps.validate_and_get_api_token")
|
||||
def test_update_by_file_deprecated_aliases_delegate_to_shared_handler(
|
||||
self,
|
||||
mock_validate_token,
|
||||
mock_feature_svc,
|
||||
mock_update_document_by_file,
|
||||
route_name,
|
||||
app,
|
||||
mock_tenant,
|
||||
mock_dataset,
|
||||
):
|
||||
"""Test legacy POST aliases still dispatch while marked deprecated."""
|
||||
_setup_billing_mocks(mock_validate_token, mock_feature_svc, mock_tenant.id)
|
||||
mock_update_document_by_file.return_value = ({"document": {"id": "doc-1"}, "batch": "batch-1"}, 200)
|
||||
|
||||
doc_id = str(uuid.uuid4())
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}/{route_name}",
|
||||
method="POST",
|
||||
headers={"Authorization": "Bearer test_token"},
|
||||
):
|
||||
api = DeprecatedDocumentUpdateByFileApi()
|
||||
response, status = api.post(
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
document_id=doc_id,
|
||||
)
|
||||
|
||||
assert status == 200
|
||||
assert response["batch"] == "batch-1"
|
||||
mock_update_document_by_file.assert_called_once_with(
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
document_id=doc_id,
|
||||
)
|
||||
|
||||
@patch("controllers.service_api.dataset.document.db")
|
||||
@patch("controllers.service_api.wraps.FeatureService")
|
||||
@patch("controllers.service_api.wraps.validate_and_get_api_token")
|
||||
@@ -1387,15 +1431,15 @@ class TestDocumentUpdateByFileApiPost:
|
||||
doc_id = str(uuid.uuid4())
|
||||
data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_file",
|
||||
method="POST",
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}",
|
||||
method="PATCH",
|
||||
content_type="multipart/form-data",
|
||||
data=data,
|
||||
headers={"Authorization": "Bearer test_token"},
|
||||
):
|
||||
api = DocumentUpdateByFileApi()
|
||||
api = DocumentApi()
|
||||
with pytest.raises(ValueError, match="Dataset does not exist"):
|
||||
api.post(
|
||||
api.patch(
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
document_id=doc_id,
|
||||
@@ -1423,15 +1467,15 @@ class TestDocumentUpdateByFileApiPost:
|
||||
doc_id = str(uuid.uuid4())
|
||||
data = {"file": (BytesIO(b"content"), "test.pdf", "application/pdf")}
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_file",
|
||||
method="POST",
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}",
|
||||
method="PATCH",
|
||||
content_type="multipart/form-data",
|
||||
data=data,
|
||||
headers={"Authorization": "Bearer test_token"},
|
||||
):
|
||||
api = DocumentUpdateByFileApi()
|
||||
api = DocumentApi()
|
||||
with pytest.raises(ValueError, match="External datasets"):
|
||||
api.post(
|
||||
api.patch(
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
document_id=doc_id,
|
||||
@@ -1482,14 +1526,14 @@ class TestDocumentUpdateByFileApiPost:
|
||||
doc_id = str(uuid.uuid4())
|
||||
data = {"file": (BytesIO(b"file content"), "test.pdf", "application/pdf")}
|
||||
with app.test_request_context(
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}/update_by_file",
|
||||
method="POST",
|
||||
f"/datasets/{mock_dataset.id}/documents/{doc_id}",
|
||||
method="PATCH",
|
||||
content_type="multipart/form-data",
|
||||
data=data,
|
||||
headers={"Authorization": "Bearer test_token"},
|
||||
):
|
||||
api = DocumentUpdateByFileApi()
|
||||
response, status = api.post(
|
||||
api = DocumentApi()
|
||||
response, status = api.patch(
|
||||
tenant_id=mock_tenant.id,
|
||||
dataset_id=mock_dataset.id,
|
||||
document_id=doc_id,
|
||||
|
||||
Reference in New Issue
Block a user