{"categories":[{"name":"Issue Resolution","description":"Fixing bugs in real GitHub issues (SWE-Bench)","benchmarks":["swe-bench"]},{"name":"Frontend","description":"UI development with visual context (SWE-Bench Multimodal)","benchmarks":["swe-bench-multimodal"]},{"name":"Greenfield","description":"Building new applications from scratch (Commit0)","benchmarks":["commit0"]},{"name":"Testing","description":"Test generation and quality (SWT-Bench)","benchmarks":["swt-bench"]},{"name":"Information Gathering","description":"Research and information retrieval (GAIA)","benchmarks":["gaia"]}]}