feat(web): rag-pipeline evaluation configuration

This commit is contained in:
JzoNg
2026-04-12 10:36:20 +08:00
parent e4c056a57a
commit 627fbd2e86
6 changed files with 295 additions and 14 deletions

View File

@@ -11,6 +11,7 @@ const mockUseEvaluationConfig = vi.hoisted(() => vi.fn())
const mockUseEvaluationNodeInfoMutation = vi.hoisted(() => vi.fn())
const mockUseSaveEvaluationConfigMutation = vi.hoisted(() => vi.fn())
const mockUseStartEvaluationRunMutation = vi.hoisted(() => vi.fn())
const mockUsePublishedPipelineInfo = vi.hoisted(() => vi.fn())
vi.mock('@/app/components/header/account-setting/model-provider-page/hooks', () => ({
useModelList: () => ({
@@ -55,6 +56,15 @@ vi.mock('@/service/use-evaluation', () => ({
useStartEvaluationRunMutation: (...args: unknown[]) => mockUseStartEvaluationRunMutation(...args),
}))
vi.mock('@/service/use-pipeline', () => ({
usePublishedPipelineInfo: (...args: unknown[]) => mockUsePublishedPipelineInfo(...args),
}))
vi.mock('@/context/dataset-detail', () => ({
useDatasetDetailContextWithSelector: (selector: (state: { dataset: { pipeline_id: string } }) => unknown) =>
selector({ dataset: { pipeline_id: 'pipeline-1' } }),
}))
vi.mock('@/service/use-workflow', () => ({
useAppWorkflow: () => ({
data: {
@@ -152,6 +162,20 @@ describe('Evaluation', () => {
isPending: false,
mutate: vi.fn(),
})
mockUsePublishedPipelineInfo.mockReturnValue({
data: {
graph: {
nodes: [{
id: 'knowledge-node',
data: {
type: 'knowledge-index',
title: 'Knowledge Base',
},
}],
edges: [],
},
},
})
mockUpload.mockResolvedValue({
id: 'uploaded-file-id',
name: 'evaluation.csv',
@@ -471,10 +495,19 @@ describe('Evaluation', () => {
default_metrics: [{
metric: 'context-precision',
value_type: 'number',
node_info_list: [],
node_info_list: [
{ node_id: 'knowledge-node', title: 'Knowledge Base', type: 'knowledge-index' },
],
}],
customized_metrics: null,
judgment_config: null,
judgment_config: {
logical_operator: 'and',
conditions: [{
variable_selector: ['knowledge-node', 'context-precision'],
comparison_operator: '≥',
value: '0.85',
}],
},
file_id: 'file-1',
},
}, {

View File

@@ -338,8 +338,117 @@ describe('evaluation store', () => {
},
}
expect(buildEvaluationConfigPayload(resource)).toEqual(expectedPayload)
expect(buildEvaluationRunRequest(resource, 'file-1')).toEqual({
expect(buildEvaluationConfigPayload(resource, resourceType)).toEqual(expectedPayload)
expect(buildEvaluationRunRequest(resource, 'file-1', resourceType)).toEqual({
...expectedPayload,
file_id: 'file-1',
})
})
it('should hydrate pipeline metrics from fixed knowledge-index conditions', () => {
const resourceType = 'datasets'
const resourceId = 'dataset-hydrate'
const store = useEvaluationStore.getState()
const config: EvaluationConfig = {
evaluation_model: 'gpt-4o-mini',
evaluation_model_provider: 'openai',
default_metrics: [{
metric: 'context-precision',
node_info_list: [
{ node_id: 'knowledge-node', title: 'Knowledge Base', type: 'knowledge-index' },
],
}],
customized_metrics: {
evaluation_workflow_id: 'should-be-ignored',
input_fields: {
query: 'answer',
},
output_fields: [{
variable: 'score',
value_type: 'number',
}],
},
judgment_config: {
logical_operator: 'or',
conditions: [{
variable_selector: ['knowledge-node', 'context-precision'],
comparison_operator: '≥',
value: '0.92',
}],
},
}
store.hydrateResource(resourceType, resourceId, config)
const hydratedState = useEvaluationStore.getState().resources['datasets:dataset-hydrate']
expect(hydratedState.judgeModelId).toBe('openai::gpt-4o-mini')
expect(hydratedState.metrics).toHaveLength(1)
expect(hydratedState.metrics[0]).toMatchObject({
optionId: 'context-precision',
kind: 'builtin',
valueType: 'number',
threshold: 0.92,
nodeInfoList: [
{ node_id: 'knowledge-node', title: 'Knowledge Base', type: 'knowledge-index' },
],
})
})
it('should build pipeline judgment payload from metric thresholds', () => {
const resourceType = 'datasets'
const resourceId = 'dataset-save-config'
const store = useEvaluationStore.getState()
const knowledgeNodeInfo = [{ node_id: 'knowledge-node', title: 'Knowledge Base', type: 'knowledge-index' }]
store.ensureResource(resourceType, resourceId)
store.setJudgeModel(resourceType, resourceId, 'openai::gpt-4o-mini')
store.addBuiltinMetric(resourceType, resourceId, 'context-precision', knowledgeNodeInfo)
store.addBuiltinMetric(resourceType, resourceId, 'context-recall', knowledgeNodeInfo)
const resourceWithMetrics = useEvaluationStore.getState().resources['datasets:dataset-save-config']
const contextPrecisionMetric = resourceWithMetrics.metrics.find(metric => metric.optionId === 'context-precision')!
const contextRecallMetric = resourceWithMetrics.metrics.find(metric => metric.optionId === 'context-recall')!
store.updateMetricThreshold(resourceType, resourceId, contextPrecisionMetric.id, 0.91)
store.updateMetricThreshold(resourceType, resourceId, contextRecallMetric.id, 0.88)
const resource = useEvaluationStore.getState().resources['datasets:dataset-save-config']
const expectedPayload = {
evaluation_model: 'gpt-4o-mini',
evaluation_model_provider: 'openai',
default_metrics: [
{
metric: 'context-precision',
value_type: 'number',
node_info_list: knowledgeNodeInfo,
},
{
metric: 'context-recall',
value_type: 'number',
node_info_list: knowledgeNodeInfo,
},
],
customized_metrics: null,
judgment_config: {
logical_operator: 'and',
conditions: [
{
variable_selector: ['knowledge-node', 'context-precision'],
comparison_operator: '≥',
value: '0.91',
},
{
variable_selector: ['knowledge-node', 'context-recall'],
comparison_operator: '≥',
value: '0.88',
},
],
},
}
expect(buildEvaluationConfigPayload(resource, resourceType)).toEqual(expectedPayload)
expect(buildEvaluationRunRequest(resource, 'file-1', resourceType)).toEqual({
...expectedPayload,
file_id: 'file-1',
})

View File

@@ -36,7 +36,7 @@ const BatchTestPanel = ({
return
}
const body = buildEvaluationConfigPayload(resource)
const body = buildEvaluationConfigPayload(resource, resourceType)
if (!body) {
toast.warning(t('batch.validation'))

View File

@@ -102,7 +102,7 @@ export const useInputFieldsActions = ({
return
}
const body = buildEvaluationRunRequest(resource, uploadedFileId)
const body = buildEvaluationRunRequest(resource, uploadedFileId, resourceType)
if (!body) {
toast.warning(t('batch.validation'))

View File

@@ -1,25 +1,62 @@
'use client'
import type { EvaluationResourceProps } from '../../types'
import { useMemo } from 'react'
import type { Node } from '@/app/components/workflow/types'
import type { NodeInfo } from '@/types/evaluation'
import { useEffect, useMemo } from 'react'
import { useTranslation } from 'react-i18next'
import { BlockEnum } from '@/app/components/workflow/types'
import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
import { useAvailableEvaluationMetrics } from '@/service/use-evaluation'
import { usePublishedPipelineInfo } from '@/service/use-pipeline'
import { getEvaluationMockConfig } from '../../mock'
import { useEvaluationResource, useEvaluationStore } from '../../store'
import { InlineSectionHeader } from '../section-header'
import PipelineMetricItem from './pipeline-metric-item'
const getKnowledgeIndexNodeInfo = (nodes: Node[] | undefined): NodeInfo[] => {
const knowledgeIndexNode = nodes?.find(node => node.data.type === BlockEnum.KnowledgeBase)
if (!knowledgeIndexNode?.id)
return []
return [{
node_id: knowledgeIndexNode.id,
title: typeof knowledgeIndexNode.data?.title === 'string' && knowledgeIndexNode.data.title
? knowledgeIndexNode.data.title
: knowledgeIndexNode.id,
type: 'knowledge-index',
}]
}
const isSameNodeInfoList = (left: NodeInfo[] | undefined, right: NodeInfo[]) => {
if ((left?.length ?? 0) !== right.length)
return false
return (left ?? []).every((nodeInfo, index) => {
const target = right[index]
return nodeInfo.node_id === target?.node_id
&& nodeInfo.title === target?.title
&& nodeInfo.type === target?.type
})
}
const PipelineMetricsSection = ({
resourceType,
resourceId,
}: EvaluationResourceProps) => {
const { t } = useTranslation('evaluation')
const pipelineId = useDatasetDetailContextWithSelector(state => state.dataset?.pipeline_id)
const addBuiltinMetric = useEvaluationStore(state => state.addBuiltinMetric)
const removeMetric = useEvaluationStore(state => state.removeMetric)
const updateMetricThreshold = useEvaluationStore(state => state.updateMetricThreshold)
const { data: availableMetricsData } = useAvailableEvaluationMetrics()
const { data: publishedPipeline } = usePublishedPipelineInfo(pipelineId || '')
const resource = useEvaluationResource(resourceType, resourceId)
const config = getEvaluationMockConfig(resourceType)
const knowledgeIndexNodeInfoList = useMemo(
() => getKnowledgeIndexNodeInfo(publishedPipeline?.graph.nodes),
[publishedPipeline?.graph.nodes],
)
const builtinMetricMap = useMemo(() => new Map(
resource.metrics
.filter(metric => metric.kind === 'builtin')
@@ -32,6 +69,18 @@ const PipelineMetricsSection = ({
)
}, [availableMetricIds, builtinMetricMap, config.builtinMetrics])
useEffect(() => {
if (!knowledgeIndexNodeInfoList.length)
return
resource.metrics.forEach((metric) => {
if (metric.kind !== 'builtin' || isSameNodeInfoList(metric.nodeInfoList, knowledgeIndexNodeInfoList))
return
addBuiltinMetric(resourceType, resourceId, metric.optionId, knowledgeIndexNodeInfoList)
})
}, [addBuiltinMetric, knowledgeIndexNodeInfoList, resource.metrics, resourceId, resourceType])
const handleToggleMetric = (metricId: string) => {
const selectedMetric = builtinMetricMap.get(metricId)
if (selectedMetric) {
@@ -39,7 +88,7 @@ const PipelineMetricsSection = ({
return
}
addBuiltinMetric(resourceType, resourceId, metricId)
addBuiltinMetric(resourceType, resourceId, metricId, knowledgeIndexNodeInfoList)
}
return (

View File

@@ -34,6 +34,8 @@ type EvaluationStoreResources = Record<string, EvaluationResourceState>
export const DEFAULT_PIPELINE_METRIC_THRESHOLD = 0.85
const PIPELINE_LOGICAL_OPERATOR: JudgmentConfig['logicalOperator'] = 'and'
const createId = (prefix: string) => `${prefix}-${Math.random().toString(36).slice(2, 10)}`
const humanizeMetricId = (metricId: string) => {
@@ -54,6 +56,15 @@ const resolveMetricOption = (resourceType: EvaluationResourceType, metricId: str
}
}
const pipelineMetricIds = new Set(getEvaluationMockConfig('datasets').builtinMetrics.map(metric => metric.id))
const isPipelineResourceType = (resourceType: EvaluationResourceType) => resourceType === 'datasets'
const isPipelineResourceState = (resource: EvaluationResourceState) => {
return resource.metrics.length > 0
&& resource.metrics.every(metric => metric.kind === 'builtin' && pipelineMetricIds.has(metric.optionId))
}
const normalizeNodeInfoList = (value: NodeInfo[] | undefined): NodeInfo[] => {
if (!value?.length)
return []
@@ -164,6 +175,46 @@ const normalizeVariableSelector = (value: string[] | undefined): [string, string
: null
}
const getConditionNumericValue = (value: EvaluationJudgmentCondition['value']) => {
if (typeof value === 'number')
return value
if (typeof value !== 'string')
return null
const parsedValue = Number(value)
return Number.isFinite(parsedValue) ? parsedValue : null
}
const getPipelineMetricThreshold = (
metric: EvaluationMetric,
config: EvaluationConfig,
) => {
const matchingCondition = (config.judgment_config?.conditions ?? []).find((condition) => {
const variableSelector = normalizeVariableSelector(condition.variable_selector)
if (!variableSelector || variableSelector[1] !== metric.optionId || condition.comparison_operator !== '≥')
return false
if (!metric.nodeInfoList?.length)
return true
return metric.nodeInfoList.some(nodeInfo => nodeInfo.node_id === variableSelector[0])
})
return getConditionNumericValue(matchingCondition?.value) ?? metric.threshold ?? DEFAULT_PIPELINE_METRIC_THRESHOLD
}
const normalizePipelineMetrics = (
config: EvaluationConfig,
metrics: EvaluationMetric[],
) => {
return metrics.map(metric => ({
...metric,
valueType: 'number' as const,
threshold: getPipelineMetricThreshold(metric, config),
}))
}
const getNormalizedConditionValue = (
operator: ComparisonOperator,
previousValue: EvaluationJudgmentConditionValue | string | number | boolean | null | undefined,
@@ -404,8 +455,10 @@ export const buildStateFromEvaluationConfig = (
config: EvaluationConfig,
): EvaluationResourceState => {
const defaultMetrics = normalizeDefaultMetrics(resourceType, config.default_metrics)
const customMetrics = normalizeCustomMetric(config.customized_metrics)
const metrics = [...defaultMetrics, ...customMetrics]
const customMetrics = isPipelineResourceType(resourceType) ? [] : normalizeCustomMetric(config.customized_metrics)
const metrics = isPipelineResourceType(resourceType)
? normalizePipelineMetrics(config, defaultMetrics)
: [...defaultMetrics, ...customMetrics]
return {
...buildInitialState(resourceType),
@@ -458,7 +511,40 @@ const buildCustomizedMetricsPayload = (metrics: EvaluationMetric[]): EvaluationC
}
}
const buildJudgmentConfigPayload = (resource: EvaluationResourceState): EvaluationConfigData['judgment_config'] => {
const buildPipelineJudgmentConfigPayload = (
resource: EvaluationResourceState,
): EvaluationConfigData['judgment_config'] => {
const conditions = resource.metrics
.filter((metric): metric is EvaluationMetric & { kind: 'builtin' } => metric.kind === 'builtin')
.map((metric) => {
const nodeInfo = metric.nodeInfoList?.[0]
if (!nodeInfo)
return null
return {
variable_selector: [nodeInfo.node_id, metric.optionId],
comparison_operator: '≥',
value: String(metric.threshold ?? DEFAULT_PIPELINE_METRIC_THRESHOLD),
}
})
.filter((condition): condition is NonNullable<typeof condition> => !!condition)
if (!conditions.length)
return null
return {
logical_operator: PIPELINE_LOGICAL_OPERATOR,
conditions,
}
}
const buildJudgmentConfigPayload = (
resource: EvaluationResourceState,
resourceType?: EvaluationResourceType,
): EvaluationConfigData['judgment_config'] => {
if ((resourceType && isPipelineResourceType(resourceType)) || isPipelineResourceState(resource))
return buildPipelineJudgmentConfigPayload(resource)
const conditions = resource.judgmentConfig.conditions
.filter(condition => !!condition.variableSelector)
.map((condition) => {
@@ -488,6 +574,7 @@ const buildJudgmentConfigPayload = (resource: EvaluationResourceState): Evaluati
export const buildEvaluationConfigPayload = (
resource: EvaluationResourceState,
resourceType?: EvaluationResourceType,
): EvaluationConfigData | null => {
const selectedModel = decodeModelSelection(resource.judgeModelId)
@@ -504,16 +591,19 @@ export const buildEvaluationConfigPayload = (
value_type: metric.valueType,
node_info_list: metric.nodeInfoList ?? [],
})),
customized_metrics: buildCustomizedMetricsPayload(resource.metrics),
judgment_config: buildJudgmentConfigPayload(resource),
customized_metrics: (resourceType && isPipelineResourceType(resourceType)) || isPipelineResourceState(resource)
? null
: buildCustomizedMetricsPayload(resource.metrics),
judgment_config: buildJudgmentConfigPayload(resource, resourceType),
}
}
export const buildEvaluationRunRequest = (
resource: EvaluationResourceState,
fileId: string,
resourceType?: EvaluationResourceType,
): EvaluationRunRequest | null => {
const configPayload = buildEvaluationConfigPayload(resource)
const configPayload = buildEvaluationConfigPayload(resource, resourceType)
if (!configPayload)
return null