Files
oh-my-claudecode/benchmark/.env.example
JunghwanNA 8cb1dae394 fix(permission-handler): remove dead code and add swarm marker support (#144) (#157)
fix(permission-handler): remove dead code and add swarm marker support
2026-01-27 23:24:02 +09:00

37 lines
1.1 KiB
Plaintext

# SWE-bench Evaluation Configuration
# Copy this file to .env and fill in your values
# Required: Anthropic authentication token for Claude Code
ANTHROPIC_AUTH_TOKEN=your_token_here
# Optional: Custom Anthropic API base URL
ANTHROPIC_BASE_URL=https://api.layofflabs.com
# Run mode: 'vanilla' for standard Claude Code, 'omc' for oh-my-claudecode enhanced
RUN_MODE=vanilla
# Maximum parallel workers for evaluation
MAX_WORKERS=4
# Dataset to evaluate against
# Options:
# - princeton-nlp/SWE-bench_Verified (300 curated instances, recommended)
# - princeton-nlp/SWE-bench_Lite (300 instances, easier subset)
# - princeton-nlp/SWE-bench (full 2294 instances)
DATASET=princeton-nlp/SWE-bench_Verified
# Optional: Subset of instances to run (comma-separated instance IDs)
# INSTANCE_IDS=django__django-11099,sympy__sympy-18057
# Optional: Maximum instances to evaluate (useful for testing)
# MAX_INSTANCES=10
# Optional: Timeout per instance in seconds (default: 1800 = 30 minutes)
# INSTANCE_TIMEOUT=1800
# Optional: Model to use (default: claude-sonnet-4-20250514)
# MODEL=claude-sonnet-4-20250514
# Optional: Enable verbose logging
# VERBOSE=true