From b15d9b04ae42d0707c19748f529927fdf47e7434 Mon Sep 17 00:00:00 2001 From: FFXN Date: Tue, 27 Jan 2026 15:53:47 +0800 Subject: [PATCH 1/2] fix: summary tokens. --- .../processor/paragraph_index_processor.py | 24 +++++++-- .../processor/parent_child_index_processor.py | 4 +- .../knowledge_index/knowledge_index_node.py | 4 +- api/models/dataset.py | 1 + api/services/summary_index_service.py | 51 ++++++++++++++++--- 5 files changed, 70 insertions(+), 14 deletions(-) diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py index 0bf1b1e30a..f78156de49 100644 --- a/api/core/rag/index_processor/processor/paragraph_index_processor.py +++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py @@ -18,6 +18,7 @@ from core.model_runtime.entities.message_entities import ( TextPromptMessageContent, UserPromptMessage, ) +from core.model_runtime.entities.llm_entities import LLMUsage from core.model_runtime.entities.model_entities import ModelFeature, ModelType from core.provider_manager import ProviderManager from core.rag.cleaner.clean_processor import CleanProcessor @@ -295,11 +296,11 @@ class ParagraphIndexProcessor(BaseIndexProcessor): if flask_app: # Ensure Flask app context in worker thread with flask_app.app_context(): - summary = self.generate_summary(tenant_id, preview.content, summary_index_setting) + summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting) preview.summary = summary else: # Fallback: try without app context (may fail) - summary = self.generate_summary(tenant_id, preview.content, summary_index_setting) + summary, _ = self.generate_summary(tenant_id, preview.content, summary_index_setting) preview.summary = summary # Generate summaries concurrently using ThreadPoolExecutor @@ -356,7 +357,7 @@ class ParagraphIndexProcessor(BaseIndexProcessor): text: str, summary_index_setting: dict | None = None, segment_id: str | None = None, - ) -> str: + ) -> tuple[str, LLMUsage]: """ Generate summary for the given text using ModelInstance.invoke_llm and the default or custom summary prompt, and supports vision models by including images from the segment attachments or text content. @@ -366,6 +367,9 @@ class ParagraphIndexProcessor(BaseIndexProcessor): text: Text content to summarize summary_index_setting: Summary index configuration segment_id: Optional segment ID to fetch attachments from SegmentAttachmentBinding table + + Returns: + Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object """ if not summary_index_setting or not summary_index_setting.get("enable"): raise ValueError("summary_index_setting is required and must be enabled to generate summary.") @@ -432,7 +436,19 @@ class ParagraphIndexProcessor(BaseIndexProcessor): result = model_instance.invoke_llm(prompt_messages=prompt_messages, model_parameters={}, stream=False) - return getattr(result.message, "content", "") + summary_content = getattr(result.message, "content", "") + usage = result.usage + + # Deduct quota for summary generation (same as workflow nodes) + from core.workflow.nodes.llm import llm_utils + + try: + llm_utils.deduct_llm_quota(tenant_id=tenant_id, model_instance=model_instance, usage=usage) + except Exception as e: + # Log but don't fail summary generation if quota deduction fails + logger.warning("Failed to deduct quota for summary generation: %s", str(e)) + + return summary_content, usage @staticmethod def _extract_images_from_text(tenant_id: str, text: str) -> list[File]: diff --git a/api/core/rag/index_processor/processor/parent_child_index_processor.py b/api/core/rag/index_processor/processor/parent_child_index_processor.py index 8c803621b8..4cba5e230c 100644 --- a/api/core/rag/index_processor/processor/parent_child_index_processor.py +++ b/api/core/rag/index_processor/processor/parent_child_index_processor.py @@ -382,7 +382,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor): if flask_app: # Ensure Flask app context in worker thread with flask_app.app_context(): - summary = ParagraphIndexProcessor.generate_summary( + summary, _ = ParagraphIndexProcessor.generate_summary( tenant_id=tenant_id, text=preview.content, summary_index_setting=summary_index_setting, @@ -390,7 +390,7 @@ class ParentChildIndexProcessor(BaseIndexProcessor): preview.summary = summary else: # Fallback: try without app context (may fail) - summary = ParagraphIndexProcessor.generate_summary( + summary, _ = ParagraphIndexProcessor.generate_summary( tenant_id=tenant_id, text=preview.content, summary_index_setting=summary_index_setting, diff --git a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py index 247c143e82..fd1bf17659 100644 --- a/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py +++ b/api/core/workflow/nodes/knowledge_index/knowledge_index_node.py @@ -364,7 +364,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]): # Set Flask application context in worker thread if flask_app: with flask_app.app_context(): - summary = ParagraphIndexProcessor.generate_summary( + summary, _ = ParagraphIndexProcessor.generate_summary( tenant_id=dataset.tenant_id, text=preview_item["content"], summary_index_setting=summary_index_setting, @@ -373,7 +373,7 @@ class KnowledgeIndexNode(Node[KnowledgeIndexNodeData]): preview_item["summary"] = summary else: # Fallback: try without app context (may fail) - summary = ParagraphIndexProcessor.generate_summary( + summary, _ = ParagraphIndexProcessor.generate_summary( tenant_id=dataset.tenant_id, text=preview_item["content"], summary_index_setting=summary_index_setting, diff --git a/api/models/dataset.py b/api/models/dataset.py index d7ccee5829..8368e4cd2e 100644 --- a/api/models/dataset.py +++ b/api/models/dataset.py @@ -1597,6 +1597,7 @@ class DocumentSegmentSummary(Base): summary_content: Mapped[str] = mapped_column(LongText, nullable=True) summary_index_node_id: Mapped[str] = mapped_column(String(255), nullable=True) summary_index_node_hash: Mapped[str] = mapped_column(String(255), nullable=True) + tokens: Mapped[int | None] = mapped_column(sa.Integer, nullable=True) status: Mapped[str] = mapped_column(String(32), nullable=False, server_default=sa.text("'generating'")) error: Mapped[str] = mapped_column(LongText, nullable=True) enabled: Mapped[bool] = mapped_column(sa.Boolean, nullable=False, server_default=sa.text("true")) diff --git a/api/services/summary_index_service.py b/api/services/summary_index_service.py index fc3efd97cc..49e546475f 100644 --- a/api/services/summary_index_service.py +++ b/api/services/summary_index_service.py @@ -4,7 +4,11 @@ import logging import time import uuid from datetime import UTC, datetime +from typing import Any +from core.model_manager import ModelManager +from core.model_runtime.entities.llm_entities import LLMUsage +from core.model_runtime.entities.model_entities import ModelType from core.rag.datasource.vdb.vector_factory import Vector from core.rag.index_processor.constant.doc_type import DocType from core.rag.models.document import Document @@ -24,7 +28,7 @@ class SummaryIndexService: segment: DocumentSegment, dataset: Dataset, summary_index_setting: dict, - ) -> str: + ) -> tuple[str, LLMUsage]: """ Generate summary for a single segment. @@ -34,7 +38,7 @@ class SummaryIndexService: summary_index_setting: Summary index configuration Returns: - Generated summary text + Tuple of (summary_content, llm_usage) where llm_usage is LLMUsage object Raises: ValueError: If summary_index_setting is invalid or generation fails @@ -43,7 +47,7 @@ class SummaryIndexService: # Use lazy import to avoid circular import from core.rag.index_processor.processor.paragraph_index_processor import ParagraphIndexProcessor - summary_content = ParagraphIndexProcessor.generate_summary( + summary_content, usage = ParagraphIndexProcessor.generate_summary( tenant_id=dataset.tenant_id, text=segment.content, summary_index_setting=summary_index_setting, @@ -53,7 +57,7 @@ class SummaryIndexService: if not summary_content: raise ValueError("Generated summary is empty") - return summary_content + return summary_content, usage @staticmethod def create_summary_record( @@ -153,6 +157,22 @@ class SummaryIndexService: str(e), ) + # Calculate embedding tokens for summary (for logging and statistics) + embedding_tokens = 0 + try: + model_manager = ModelManager() + embedding_model = model_manager.get_model_instance( + tenant_id=dataset.tenant_id, + provider=dataset.embedding_model_provider, + model_type=ModelType.TEXT_EMBEDDING, + model=dataset.embedding_model, + ) + if embedding_model: + tokens_list = embedding_model.get_text_embedding_num_tokens([summary_record.summary_content]) + embedding_tokens = tokens_list[0] if tokens_list else 0 + except Exception as e: + logger.warning("Failed to calculate embedding tokens for summary: %s", str(e)) + # Create document with summary content and metadata summary_document = Document( page_content=summary_record.summary_content, @@ -179,9 +199,18 @@ class SummaryIndexService: # we still want to re-vectorize (upsert will overwrite) vector.add_texts([summary_document], duplicate_check=False) + # Log embedding token usage + if embedding_tokens > 0: + logger.info( + "Summary embedding for segment %s used %s tokens", + segment.id, + embedding_tokens, + ) + # Success - update summary record with index node info summary_record.summary_index_node_id = summary_index_node_id summary_record.summary_index_node_hash = summary_hash + summary_record.tokens = embedding_tokens # Save embedding tokens summary_record.status = "completed" # Explicitly update updated_at to ensure it's refreshed even if other fields haven't changed summary_record.updated_at = datetime.now(UTC).replace(tzinfo=None) @@ -364,14 +393,24 @@ class SummaryIndexService: db.session.add(summary_record) db.session.flush() - # Generate summary - summary_content = SummaryIndexService.generate_summary_for_segment( + # Generate summary (returns summary_content and llm_usage) + summary_content, llm_usage = SummaryIndexService.generate_summary_for_segment( segment, dataset, summary_index_setting ) # Update summary content summary_record.summary_content = summary_content + # Log LLM usage for summary generation + if llm_usage and llm_usage.total_tokens > 0: + logger.info( + "Summary generation for segment %s used %s tokens (prompt: %s, completion: %s)", + segment.id, + llm_usage.total_tokens, + llm_usage.prompt_tokens, + llm_usage.completion_tokens, + ) + # Vectorize summary (will delete old vector if exists before creating new one) SummaryIndexService.vectorize_summary(summary_record, segment, dataset) From e1cb37e9672aea7646e26e36fa314a33e5f5446c Mon Sep 17 00:00:00 2001 From: FFXN Date: Tue, 27 Jan 2026 16:11:09 +0800 Subject: [PATCH 2/2] fix: summary tokens. --- .../rag/index_processor/processor/paragraph_index_processor.py | 2 +- api/services/summary_index_service.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/api/core/rag/index_processor/processor/paragraph_index_processor.py b/api/core/rag/index_processor/processor/paragraph_index_processor.py index f78156de49..392dfda0ba 100644 --- a/api/core/rag/index_processor/processor/paragraph_index_processor.py +++ b/api/core/rag/index_processor/processor/paragraph_index_processor.py @@ -12,13 +12,13 @@ from core.entities.knowledge_entities import PreviewDetail from core.file import File, FileTransferMethod, FileType, file_manager from core.llm_generator.prompts import DEFAULT_GENERATOR_SUMMARY_PROMPT from core.model_manager import ModelInstance +from core.model_runtime.entities.llm_entities import LLMUsage from core.model_runtime.entities.message_entities import ( ImagePromptMessageContent, PromptMessageContentUnionTypes, TextPromptMessageContent, UserPromptMessage, ) -from core.model_runtime.entities.llm_entities import LLMUsage from core.model_runtime.entities.model_entities import ModelFeature, ModelType from core.provider_manager import ProviderManager from core.rag.cleaner.clean_processor import CleanProcessor diff --git a/api/services/summary_index_service.py b/api/services/summary_index_service.py index 49e546475f..e592e2e8ef 100644 --- a/api/services/summary_index_service.py +++ b/api/services/summary_index_service.py @@ -4,7 +4,6 @@ import logging import time import uuid from datetime import UTC, datetime -from typing import Any from core.model_manager import ModelManager from core.model_runtime.entities.llm_entities import LLMUsage