mirror of
https://mirror.skon.top/github.com/langgenius/dify.git
synced 2026-04-20 15:20:15 +08:00
test: migrate clean notion task tests to SQLAlchemy 2.0 APIs (#35159)
This commit is contained in:
@@ -11,7 +11,8 @@ from unittest.mock import Mock, patch
|
||||
|
||||
import pytest
|
||||
from faker import Faker
|
||||
from sqlalchemy import func, select
|
||||
from sqlalchemy import ColumnElement, func, select
|
||||
from sqlalchemy.orm import Session
|
||||
|
||||
from core.rag.index_processor.constant.index_type import IndexStructureType
|
||||
from models.dataset import Dataset, Document, DocumentSegment
|
||||
@@ -21,6 +22,14 @@ from tasks.clean_notion_document_task import clean_notion_document_task
|
||||
from tests.test_containers_integration_tests.helpers import generate_valid_password
|
||||
|
||||
|
||||
def _count_documents(session: Session, condition: ColumnElement[bool]) -> int:
|
||||
return session.scalar(select(func.count()).select_from(Document).where(condition)) or 0
|
||||
|
||||
|
||||
def _count_segments(session: Session, condition: ColumnElement[bool]) -> int:
|
||||
return session.scalar(select(func.count()).select_from(DocumentSegment).where(condition)) or 0
|
||||
|
||||
|
||||
class TestCleanNotionDocumentTask:
|
||||
"""Integration tests for clean_notion_document_task using testcontainers."""
|
||||
|
||||
@@ -146,29 +155,14 @@ class TestCleanNotionDocumentTask:
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Verify data exists before cleanup
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.id.in_(document_ids))
|
||||
)
|
||||
== 3
|
||||
)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id.in_(document_ids))
|
||||
)
|
||||
== 6
|
||||
)
|
||||
assert _count_documents(db_session_with_containers, Document.id.in_(document_ids)) == 3
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id.in_(document_ids)) == 6
|
||||
|
||||
# Execute cleanup task
|
||||
clean_notion_document_task(document_ids, dataset.id)
|
||||
|
||||
# Verify segments are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id.in_(document_ids))
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id.in_(document_ids)) == 0
|
||||
|
||||
# Verify index processor was called
|
||||
mock_processor = mock_index_processor_factory.return_value.init_index_processor.return_value
|
||||
@@ -328,12 +322,7 @@ class TestCleanNotionDocumentTask:
|
||||
# The task properly handles various index types and document configurations.
|
||||
|
||||
# Verify segments are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id == document.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == document.id) == 0
|
||||
|
||||
# Reset mock for next iteration
|
||||
mock_index_processor_factory.reset_mock()
|
||||
@@ -416,12 +405,7 @@ class TestCleanNotionDocumentTask:
|
||||
clean_notion_document_task([document.id], dataset.id)
|
||||
|
||||
# Verify segments are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id == document.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == document.id) == 0
|
||||
|
||||
# Note: This test successfully verifies that segments without index_node_ids
|
||||
# are properly deleted from the database.
|
||||
@@ -507,18 +491,8 @@ class TestCleanNotionDocumentTask:
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Verify all data exists before cleanup
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.dataset_id == dataset.id)
|
||||
)
|
||||
== 5
|
||||
)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
)
|
||||
== 10
|
||||
)
|
||||
assert _count_documents(db_session_with_containers, Document.dataset_id == dataset.id) == 5
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.dataset_id == dataset.id) == 10
|
||||
|
||||
# Clean up only first 3 documents
|
||||
documents_to_clean = [doc.id for doc in documents[:3]]
|
||||
@@ -528,29 +502,12 @@ class TestCleanNotionDocumentTask:
|
||||
clean_notion_document_task(documents_to_clean, dataset.id)
|
||||
|
||||
# Verify only specified documents' segments are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count())
|
||||
.select_from(DocumentSegment)
|
||||
.where(DocumentSegment.document_id.in_(documents_to_clean))
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id.in_(documents_to_clean)) == 0
|
||||
|
||||
# Verify remaining documents and segments are intact
|
||||
remaining_docs = [doc.id for doc in documents[3:]]
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.id.in_(remaining_docs))
|
||||
)
|
||||
== 2
|
||||
)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id.in_(remaining_docs))
|
||||
)
|
||||
== 4
|
||||
)
|
||||
assert _count_documents(db_session_with_containers, Document.id.in_(remaining_docs)) == 2
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id.in_(remaining_docs)) == 4
|
||||
|
||||
# Note: This test successfully verifies partial document cleanup operations.
|
||||
# The database operations work correctly, isolating only the specified documents.
|
||||
@@ -634,23 +591,13 @@ class TestCleanNotionDocumentTask:
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Verify all segments exist before cleanup
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id == document.id)
|
||||
)
|
||||
== 4
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == document.id) == 4
|
||||
|
||||
# Execute cleanup task
|
||||
clean_notion_document_task([document.id], dataset.id)
|
||||
|
||||
# Verify all segments are deleted regardless of status
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id == document.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == document.id) == 0
|
||||
|
||||
# Note: This test successfully verifies database operations.
|
||||
# IndexProcessor verification would require more sophisticated mocking.
|
||||
@@ -820,16 +767,9 @@ class TestCleanNotionDocumentTask:
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Verify all data exists before cleanup
|
||||
assert _count_documents(db_session_with_containers, Document.dataset_id == dataset.id) == num_documents
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.dataset_id == dataset.id)
|
||||
)
|
||||
== num_documents
|
||||
)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
)
|
||||
_count_segments(db_session_with_containers, DocumentSegment.dataset_id == dataset.id)
|
||||
== num_documents * num_segments_per_doc
|
||||
)
|
||||
|
||||
@@ -838,12 +778,7 @@ class TestCleanNotionDocumentTask:
|
||||
clean_notion_document_task(all_document_ids, dataset.id)
|
||||
|
||||
# Verify all segments are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.dataset_id == dataset.id) == 0
|
||||
|
||||
# Note: This test successfully verifies bulk document cleanup operations.
|
||||
# The database efficiently handles large-scale deletions.
|
||||
@@ -950,29 +885,12 @@ class TestCleanNotionDocumentTask:
|
||||
clean_notion_document_task([target_document.id], target_dataset.id)
|
||||
|
||||
# Verify only documents' segments from target dataset are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count())
|
||||
.select_from(DocumentSegment)
|
||||
.where(DocumentSegment.document_id == target_document.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == target_document.id) == 0
|
||||
|
||||
# Verify documents from other datasets remain intact
|
||||
remaining_docs = [doc.id for doc in all_documents[1:]]
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.id.in_(remaining_docs))
|
||||
)
|
||||
== 2
|
||||
)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id.in_(remaining_docs))
|
||||
)
|
||||
== 6
|
||||
)
|
||||
assert _count_documents(db_session_with_containers, Document.id.in_(remaining_docs)) == 2
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id.in_(remaining_docs)) == 6
|
||||
|
||||
# Note: This test successfully verifies multi-tenant isolation.
|
||||
# Only documents from the target dataset are affected, maintaining tenant separation.
|
||||
@@ -1067,13 +985,9 @@ class TestCleanNotionDocumentTask:
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Verify all data exists before cleanup
|
||||
assert db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.dataset_id == dataset.id)
|
||||
) == len(document_statuses)
|
||||
assert _count_documents(db_session_with_containers, Document.dataset_id == dataset.id) == len(document_statuses)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
)
|
||||
_count_segments(db_session_with_containers, DocumentSegment.dataset_id == dataset.id)
|
||||
== len(document_statuses) * 2
|
||||
)
|
||||
|
||||
@@ -1082,12 +996,7 @@ class TestCleanNotionDocumentTask:
|
||||
clean_notion_document_task(all_document_ids, dataset.id)
|
||||
|
||||
# Verify all segments are deleted regardless of status
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.dataset_id == dataset.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.dataset_id == dataset.id) == 0
|
||||
|
||||
# Note: This test successfully verifies cleanup of documents in various states.
|
||||
# All documents are deleted regardless of their indexing status.
|
||||
@@ -1185,29 +1094,14 @@ class TestCleanNotionDocumentTask:
|
||||
db_session_with_containers.commit()
|
||||
|
||||
# Verify data exists before cleanup
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(Document).where(Document.id == document.id)
|
||||
)
|
||||
== 1
|
||||
)
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id == document.id)
|
||||
)
|
||||
== 3
|
||||
)
|
||||
assert _count_documents(db_session_with_containers, Document.id == document.id) == 1
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == document.id) == 3
|
||||
|
||||
# Execute cleanup task
|
||||
clean_notion_document_task([document.id], dataset.id)
|
||||
|
||||
# Verify segments are deleted
|
||||
assert (
|
||||
db_session_with_containers.scalar(
|
||||
select(func.count()).select_from(DocumentSegment).where(DocumentSegment.document_id == document.id)
|
||||
)
|
||||
== 0
|
||||
)
|
||||
assert _count_segments(db_session_with_containers, DocumentSegment.document_id == document.id) == 0
|
||||
|
||||
# Note: This test successfully verifies cleanup of documents with rich metadata.
|
||||
# The task properly handles complex document structures and metadata fields.
|
||||
|
||||
Reference in New Issue
Block a user