# SWE-bench Evaluation Configuration # Copy this file to .env and fill in your values # Required: Anthropic authentication token for Claude Code ANTHROPIC_AUTH_TOKEN=your_token_here # Optional: Custom Anthropic API base URL ANTHROPIC_BASE_URL=https://api.layofflabs.com # Run mode: 'vanilla' for standard Claude Code, 'omc' for oh-my-claudecode enhanced RUN_MODE=vanilla # Maximum parallel workers for evaluation MAX_WORKERS=4 # Dataset to evaluate against # Options: # - princeton-nlp/SWE-bench_Verified (300 curated instances, recommended) # - princeton-nlp/SWE-bench_Lite (300 instances, easier subset) # - princeton-nlp/SWE-bench (full 2294 instances) DATASET=princeton-nlp/SWE-bench_Verified # Optional: Subset of instances to run (comma-separated instance IDs) # INSTANCE_IDS=django__django-11099,sympy__sympy-18057 # Optional: Maximum instances to evaluate (useful for testing) # MAX_INSTANCES=10 # Optional: Timeout per instance in seconds (default: 1800 = 30 minutes) # INSTANCE_TIMEOUT=1800 # Optional: Model to use (default: claude-sonnet-4-20250514) # MODEL=claude-sonnet-4-20250514 # Optional: Enable verbose logging # VERBOSE=true