Files
oh-my-claudecode/test-routing.mjs

179 lines
7.3 KiB
JavaScript

/**
* Test script for model routing
*
* Tests the PROACTIVE routing approach where the orchestrator (Opus)
* analyzes task complexity upfront and delegates to the right model.
*/
import {
routeTask,
getModelForTask,
analyzeTaskComplexity,
adaptPromptForTier,
quickTierForAgent,
explainRouting,
extractAllSignals,
calculateComplexityScore,
TIER_MODELS,
} from './dist/features/model-routing/index.js';
console.log('=== Model Routing Test Suite ===\n');
// Test cases with expected tiers
const testCases = [
// LOW tier - simple searches
{ prompt: 'Find all .ts files in src/', agent: 'explore', expectedTier: 'LOW' },
{ prompt: 'Where is the config file?', agent: 'explore', expectedTier: 'LOW' },
{ prompt: 'List all functions in utils.ts', agent: 'explore', expectedTier: 'LOW' },
// MEDIUM tier - standard implementation
{ prompt: 'Add a new button component with hover state', agent: 'designer', expectedTier: 'MEDIUM' },
{ prompt: 'Update the user list component to show email addresses', agent: 'executor', expectedTier: 'MEDIUM' },
// HIGH tier - risky refactoring (detected via keywords)
{ prompt: 'Refactor the user service to use the new database schema and add migrations', agent: 'executor', expectedTier: 'HIGH' },
// LOW tier - short or writer tasks
{ prompt: 'Write documentation for the API endpoints', agent: 'writer', expectedTier: 'LOW' },
{ prompt: 'Implement the user profile page', agent: 'executor', expectedTier: 'LOW' },
// HIGH tier - complex tasks
{ prompt: 'Analyze the root cause of the authentication bug affecting production users', agent: 'architect', expectedTier: 'HIGH' },
{ prompt: 'Design the architecture for a new microservices system with event sourcing', agent: 'architect', expectedTier: 'HIGH' },
{ prompt: 'Refactor the entire API layer to use dependency injection pattern', agent: 'planner', expectedTier: 'HIGH' },
{ prompt: 'Debug the critical security vulnerability in the payment system', agent: 'architect', expectedTier: 'HIGH' },
];
console.log('--- Test 1: Basic Routing ---\n');
let passed = 0;
let failed = 0;
for (const test of testCases) {
const decision = routeTask({
taskPrompt: test.prompt,
agentType: test.agent,
});
const status = decision.tier === test.expectedTier ? '✓' : '✗';
const color = decision.tier === test.expectedTier ? '\x1b[32m' : '\x1b[31m';
console.log(`${color}${status}\x1b[0m [${decision.tier}] ${test.agent}: "${test.prompt.substring(0, 50)}..."`);
console.log(` Model: ${decision.model}`);
console.log(` Confidence: ${(decision.confidence * 100).toFixed(0)}%`);
console.log(` Reasons: ${decision.reasons.join(', ')}`);
console.log('');
if (decision.tier === test.expectedTier) {
passed++;
} else {
failed++;
console.log(` Expected: ${test.expectedTier}, Got: ${decision.tier}`);
}
}
console.log(`\n--- Results: ${passed}/${testCases.length} passed ---\n`);
console.log('--- Test 2: Agent Quick Tier Lookup ---\n');
const agents = ['architect', 'planner', 'critic', 'explore', 'writer', 'designer', 'executor'];
for (const agent of agents) {
const tier = quickTierForAgent(agent);
console.log(` ${agent}: ${tier}${TIER_MODELS[tier]}`);
}
console.log('\n--- Test 3: Proactive Model Selection (getModelForTask) ---\n');
const modelTestCases = [
// Worker agents - adaptive based on task
{ agent: 'executor', prompt: 'Fix this typo in the README', expectedModel: 'haiku' },
{ agent: 'executor', prompt: 'Refactor payment system with migration scripts for production data', expectedModel: 'opus' },
// Architect - adaptive: lookup → haiku, complex → opus
{ agent: 'architect', prompt: 'Where is the auth middleware configured?', expectedModel: 'haiku' },
{ agent: 'architect', prompt: 'Debug this race condition in the payment system', expectedModel: 'opus' },
// Planner - adaptive: simple → haiku, strategic → opus
{ agent: 'planner', prompt: 'List the steps to add a button', expectedModel: 'haiku' },
{ agent: 'planner', prompt: 'Design the migration strategy for our monolith to microservices with risk analysis', expectedModel: 'opus' },
// Explore - adaptive (not fixed to haiku anymore)
{ agent: 'explore', prompt: 'Find all .ts files', expectedModel: 'haiku' },
// Reviewer agents can still route down for simple prompts
{ agent: 'critic', prompt: 'Simple task', expectedModel: 'haiku' },
];
console.log('Orchestrator proactively selects model based on task complexity:\n');
for (const test of modelTestCases) {
const result = getModelForTask(test.agent, test.prompt);
const status = result.model === test.expectedModel ? '✓' : '✗';
const color = result.model === test.expectedModel ? '\x1b[32m' : '\x1b[31m';
console.log(`${color}${status}\x1b[0m ${test.agent} + "${test.prompt.substring(0, 40)}..."`);
console.log(` → model: ${result.model} (${result.tier})`);
console.log(` → reason: ${result.reason}`);
console.log('');
}
console.log('--- Test 4: Prompt Adaptation ---\n');
const samplePrompt = 'Implement user authentication with JWT tokens';
console.log('Original prompt:', samplePrompt);
console.log('\nAdapted for each tier:\n');
for (const tier of ['LOW', 'MEDIUM', 'HIGH']) {
console.log(`=== ${tier} tier ===`);
const adapted = adaptPromptForTier(samplePrompt, tier);
console.log(adapted.substring(0, 300) + (adapted.length > 300 ? '...' : ''));
console.log('');
}
console.log('--- Test 5: Signal Extraction ---\n');
const complexPrompt = `
Analyze the production authentication system across multiple services.
The bug affects user login, session management, and API authorization.
We need to understand the root cause and design a fix that handles:
1. Race conditions in token refresh
2. Session invalidation across microservices
3. Backwards compatibility with existing clients
This is critical and urgent - users are being logged out randomly.
`;
console.log('Complex prompt signals:');
const signals = extractAllSignals(complexPrompt, 'architect');
console.log(JSON.stringify(signals, null, 2));
const score = calculateComplexityScore(signals);
console.log(`\nComplexity score: ${score.toFixed(2)}`);
console.log('\n--- Test 6: Routing Explanation ---\n');
const explanation = explainRouting({
taskPrompt: complexPrompt,
agentType: 'architect',
});
console.log(explanation);
console.log('\n--- Test 7: Complexity Analysis Helper ---\n');
const analysisPrompt = 'Refactor the payment processing module to support multiple payment providers and add migration scripts for existing transactions';
const analysis = analyzeTaskComplexity(analysisPrompt, 'executor');
console.log('Task:', analysisPrompt.substring(0, 60) + '...');
console.log('\nAnalysis Result:');
console.log(analysis.analysis);
console.log('\nKey Signals:');
console.log(` - Word count: ${analysis.signals.wordCount}`);
console.log(` - Architecture keywords: ${analysis.signals.hasArchitectureKeywords}`);
console.log(` - Risk keywords: ${analysis.signals.hasRiskKeywords}`);
console.log(` - Estimated subtasks: ${analysis.signals.estimatedSubtasks}`);
console.log(` - Impact scope: ${analysis.signals.impactScope}`);
console.log('\n=== All Tests Complete ===');
console.log('\nSUMMARY: Routing is now PROACTIVE - orchestrator (Opus) analyzes');
console.log('complexity upfront and delegates with the appropriate model parameter.');