{"entries":[{"id":"OpenHands_v1.11.0_claude-opus-4-7","language_model":"claude-opus-4-7","sdk_version":"v1.11.0","openness":"closed","average_score":68.17999999999999,"average_cost":2.2620000000000005,"average_runtime":292.6,"categories_completed":5,"release_date":"2026-04-16","benchmarks":{"swe-bench":{"score":74.2,"cost":1.08,"runtime":183.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-anthropic-claude-opus-4-7/24583721908/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/9464c828-db85-45f7-b4e9-b6526cb7d2f3"},"swe-bench-multimodal":{"score":48.5,"cost":2.83,"runtime":372.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-anthropic-claude-opus-4-7/24642030719/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/cfe39593-1e41-44ba-bf2f-a29e64a51432"},"commit0":{"score":56.2,"cost":5.69,"runtime":636.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-7/24805652683/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/825aa22d-16cb-49c1-8080-97a46db81099"},"swt-bench":{"score":80.8,"cost":0.82,"runtime":143.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-anthropic-claude-opus-4-7/24612239545/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/96a4370b-a553-4c91-951b-79ae3a93979d"},"gaia":{"score":81.2,"cost":0.89,"runtime":129.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-anthropic-claude-opus-4-7/24585074284/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/734845e8-b910-414f-a4b7-ad60d464131c"}},"categories":{"Issue Resolution":{"score":74.2,"cost":1.08,"runtime":183.0},"Frontend":{"score":48.5,"cost":2.83,"runtime":372.0},"Greenfield":{"score":56.2,"cost":5.69,"runtime":636.0},"Testing":{"score":80.8,"cost":0.82,"runtime":143.0},"Information Gathering":{"score":81.2,"cost":0.89,"runtime":129.0}}},{"id":"OpenHands_v1.11.0_claude-opus-4-6","language_model":"claude-opus-4-6","sdk_version":"v1.11.0","openness":"closed","average_score":66.72,"average_cost":2.34,"average_runtime":500.6,"categories_completed":5,"release_date":"2026-02-05","benchmarks":{"swe-bench":{"score":76.8,"cost":0.77,"runtime":207.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-anthropic-claude-opus-4-6/23560449752/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/77bb7715-dcff-4718-8e4f-611eae769107"},"swe-bench-multimodal":{"score":41.8,"cost":2.37,"runtime":602.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-anthropic-claude-opus-4-6/21767110679/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/0fe00101-1f0a-4698-8a43-70fd665f4936"},"commit0":{"score":56.2,"cost":7.69,"runtime":1030.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-6/24745405298/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/8a792401-ab45-48b6-8398-99a1a7534f69"},"swt-bench":{"score":78.8,"cost":0.43,"runtime":138.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-anthropic-claude-opus-4-6/21754233398/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/cbf7f857-21d0-4526-b830-25d4e88ab0ed"},"gaia":{"score":80.0,"cost":0.44,"runtime":526.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-anthropic-claude-opus-4-6/21767030214/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/09286fea-ded4-4f32-a8b1-97e3a5847b83"}},"categories":{"Issue Resolution":{"score":76.8,"cost":0.77,"runtime":207.0},"Frontend":{"score":41.8,"cost":2.37,"runtime":602.0},"Greenfield":{"score":56.2,"cost":7.69,"runtime":1030.0},"Testing":{"score":78.8,"cost":0.43,"runtime":138.0},"Information Gathering":{"score":80.0,"cost":0.44,"runtime":526.0}}},{"id":"OpenHands_v1.18.1_GPT-5.5","language_model":"GPT-5.5","sdk_version":"v1.18.1","openness":"closed","average_score":65.94,"average_cost":2.31,"average_runtime":423.0,"categories_completed":5,"release_date":"2026-04-23","benchmarks":{"swe-bench":{"score":78.2,"cost":1.52,"runtime":294.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-openai-gpt-5-5/25335148711/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/0875da38-90b0-4b49-9d66-0cbac446fd85"},"swe-bench-multimodal":{"score":38.2,"cost":2.81,"runtime":458.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openai-gpt-5-5/25064691625/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/55c33492-2ff8-4f59-ba0a-566bcdbb2cd4"},"commit0":{"score":43.8,"cost":5.56,"runtime":1029.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-gpt-5-5/25565888384/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/bab0c2b3-3c08-442d-98e8-7f34bfd76ebd"},"swt-bench":{"score":83.4,"cost":0.92,"runtime":183.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-openai-gpt-5-5/25001986019/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/6eec49a3-ecfb-4081-8282-caf1e3e1dda0"},"gaia":{"score":86.1,"cost":0.74,"runtime":151.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-openai-gpt-5-5/25064674912/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/268dd4d4-e587-449f-ba49-c2a3b6405683"}},"categories":{"Issue Resolution":{"score":78.2,"cost":1.52,"runtime":294.0},"Frontend":{"score":38.2,"cost":2.81,"runtime":458.0},"Greenfield":{"score":43.8,"cost":5.56,"runtime":1029.0},"Testing":{"score":83.4,"cost":0.92,"runtime":183.0},"Information Gathering":{"score":86.1,"cost":0.74,"runtime":151.0}}},{"id":"OpenHands_v1.13.0_GPT-5.4","language_model":"GPT-5.4","sdk_version":"v1.13.0","openness":"closed","average_score":64.28,"average_cost":1.44,"average_runtime":473.4,"categories_completed":5,"release_date":"2026-03-05","benchmarks":{"swe-bench":{"score":75.6,"cost":0.63,"runtime":284.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-openai-gpt-5-4/24890465655/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/5f6faa00-117b-4471-b635-5d830fcac6d5"},"swe-bench-multimodal":{"score":36.8,"cost":1.45,"runtime":458.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openai-gpt-5-4/24796045694/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/b8e55418-a747-461b-92b8-2981612e5650"},"commit0":{"score":56.2,"cost":4.04,"runtime":1173.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-gpt-5-4/25079308429/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/04c309cc-564d-465f-8e84-e0b114044609"},"swt-bench":{"score":70.4,"cost":0.47,"runtime":228.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-openai-gpt-5-4/24890492621/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/90cebe80-49bb-40c1-b65b-78f5e40461f9"},"gaia":{"score":82.4,"cost":0.61,"runtime":224.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-openai-gpt-5-4/24777631212/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/9eaf668d-bfbe-4525-a0f3-d29cb6479a98"}},"categories":{"Issue Resolution":{"score":75.6,"cost":0.63,"runtime":284.0},"Frontend":{"score":36.8,"cost":1.45,"runtime":458.0},"Greenfield":{"score":56.2,"cost":4.04,"runtime":1173.0},"Testing":{"score":70.4,"cost":0.47,"runtime":228.0},"Information Gathering":{"score":82.4,"cost":0.61,"runtime":224.0}}},{"id":"OpenHands_v1.8.3_claude-opus-4-5","language_model":"claude-opus-4-5","sdk_version":"v1.8.3","openness":"closed","average_score":60.58,"average_cost":3.0799999999999996,"average_runtime":525.0,"categories_completed":5,"release_date":"2025-11-24","benchmarks":{"swe-bench":{"score":76.6,"cost":1.82,"runtime":325.0,"download_url":"https://results.eval.all-hands.dev/eval-21370451733-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-26-23-59.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":41.2,"cost":2.54,"runtime":671.0,"download_url":"https://results.eval.all-hands.dev/eval-21323385943-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-25-04-21.tar.gz","visualization_url":null},"commit0":{"score":37.5,"cost":9.11,"runtime":1264.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-opus-4-5-20251101/25079416757/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/d15f9cba-5c46-4413-b71d-f7b5d15e6efb"},"swt-bench":{"score":78.5,"cost":1.38,"runtime":268.0,"download_url":"https://results.eval.all-hands.dev/eval-21173239168-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-20-19-27.tar.gz","visualization_url":null},"gaia":{"score":69.1,"cost":0.55,"runtime":97.0,"download_url":"https://results.eval.all-hands.dev/eval-20805719842-claude-4-5_litellm_proxy-anthropic-claude-opus-4-5-20251101_26-01-08-06-24.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":76.6,"cost":1.82,"runtime":325.0},"Frontend":{"score":41.2,"cost":2.54,"runtime":671.0},"Greenfield":{"score":37.5,"cost":9.11,"runtime":1264.0},"Testing":{"score":78.5,"cost":1.38,"runtime":268.0},"Information Gathering":{"score":69.1,"cost":0.55,"runtime":97.0}}},{"id":"OpenHands_v1.8.3_GPT-5.2","language_model":"GPT-5.2","sdk_version":"v1.8.3","openness":"closed","average_score":58.839999999999996,"average_cost":1.874,"average_runtime":832.6,"categories_completed":5,"release_date":"2025-12-11","benchmarks":{"swe-bench":{"score":74.6,"cost":0.86,"runtime":476.0,"download_url":"https://results.eval.all-hands.dev/eval-21010530639-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-15-04-24.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":30.9,"cost":2.77,"runtime":1571.0,"download_url":"https://results.eval.all-hands.dev/eval-21320837315-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-25-04-55.tar.gz","visualization_url":null},"commit0":{"score":50.0,"cost":4.7,"runtime":1580.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openai-gpt-5-2-2025-12-11/25033142930/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/c343765e-1fdd-494f-96b7-de843c272a36"},"swt-bench":{"score":73.2,"cost":0.56,"runtime":347.0,"download_url":"https://results.eval.all-hands.dev/eval-21146218648-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-19-23-58.tar.gz","visualization_url":null},"gaia":{"score":65.5,"cost":0.48,"runtime":189.0,"download_url":"https://results.eval.all-hands.dev/eval-21041979432-gpt-5-2_litellm_proxy-openai-gpt-5-2-2025-12-11_26-01-15-20-40.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":74.6,"cost":0.86,"runtime":476.0},"Frontend":{"score":30.9,"cost":2.77,"runtime":1571.0},"Greenfield":{"score":50.0,"cost":4.7,"runtime":1580.0},"Testing":{"score":73.2,"cost":0.56,"runtime":347.0},"Information Gathering":{"score":65.5,"cost":0.48,"runtime":189.0}}},{"id":"OpenHands_v1.8.3_GPT-5.2-Codex","language_model":"GPT-5.2-Codex","sdk_version":"v1.8.3","openness":"closed","average_score":58.279999999999994,"average_cost":2.124,"average_runtime":914.8,"categories_completed":5,"release_date":"2025-12-18","benchmarks":{"swe-bench":{"score":73.8,"cost":0.94,"runtime":438.0,"download_url":"https://results.eval.all-hands.dev/eval-21386738547-gpt-5-2-co_litellm_proxy-gpt-5-2-codex_26-01-27-12-57.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":35.9,"cost":2.97,"runtime":1434.0,"download_url":"https://results.eval.all-hands.dev/eval-21383028167-gpt-5-2-co_litellm_proxy-gpt-5-2-codex_26-01-27-09-13.tar.gz","visualization_url":null},"commit0":{"score":43.8,"cost":5.5,"runtime":1559.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-gpt-5-2-codex/24787401776/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/57c6395a-4cd4-4346-a0c0-4b1209275523"},"swt-bench":{"score":67.0,"cost":0.66,"runtime":344.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-gpt-5-2-codex/23816423289/results.tar.gz","visualization_url":null},"gaia":{"score":70.9,"cost":0.55,"runtime":799.0,"download_url":"https://results.eval.all-hands.dev/eval-21377864374-gpt-5-2-co_litellm_proxy-gpt-5-2-codex_26-01-27-04-20.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":73.8,"cost":0.94,"runtime":438.0},"Frontend":{"score":35.9,"cost":2.97,"runtime":1434.0},"Greenfield":{"score":43.8,"cost":5.5,"runtime":1559.0},"Testing":{"score":67.0,"cost":0.66,"runtime":344.0},"Information Gathering":{"score":70.9,"cost":0.55,"runtime":799.0}}},{"id":"OpenHands_v1.11.5_GLM-5.1","language_model":"GLM-5.1","sdk_version":"v1.11.5","openness":"open","average_score":58.239999999999995,"average_cost":3.0,"average_runtime":1367.4,"categories_completed":5,"release_date":"2026-04-07","benchmarks":{"swe-bench":{"score":75.0,"cost":1.54,"runtime":748.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-openrouter-z-ai-glm-5-1/24697829798/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/6543c602-c721-48e6-a0fe-a1abb08fab4b"},"swe-bench-multimodal":{"score":41.2,"cost":6.92,"runtime":3031.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-z-ai-glm-5-1/24730025059/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/26dfa179-9ab7-4b15-988d-b7ed2bf91328"},"commit0":{"score":37.5,"cost":5.31,"runtime":2498.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openrouter-z-ai-glm-5-1/24902095932/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/069f1865-a4b0-4a67-9451-3043c2ab138c"},"swt-bench":{"score":70.2,"cost":0.76,"runtime":280.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-openrouter-z-ai-glm-5-1/24214829814/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/edae3592-9be7-4da5-83c3-b6ebe10e1bf6"},"gaia":{"score":67.3,"cost":0.47,"runtime":280.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-openrouter-z-ai-glm-5-1/24376299760/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/0d35046a-cbee-473b-b316-d4f57ff7f9d2"}},"categories":{"Issue Resolution":{"score":75.0,"cost":1.54,"runtime":748.0},"Frontend":{"score":41.2,"cost":6.92,"runtime":3031.0},"Greenfield":{"score":37.5,"cost":5.31,"runtime":2498.0},"Testing":{"score":70.2,"cost":0.76,"runtime":280.0},"Information Gathering":{"score":67.3,"cost":0.47,"runtime":280.0}}},{"id":"OpenHands_v1.11.5_Gemini-3.1-Pro","language_model":"Gemini-3.1-Pro","sdk_version":"v1.11.5","openness":"closed","average_score":56.98,"average_cost":0.734,"average_runtime":858.8,"categories_completed":5,"release_date":"2026-01-15","benchmarks":{"swe-bench":{"score":75.4,"cost":0.63,"runtime":983.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-gemini-3-1-pro-preview/22671304193/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/0e46d33a-d47c-4eff-a8af-70d9035ed9d3"},"swe-bench-multimodal":{"score":44.1,"cost":1.24,"runtime":1868.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-gemini-3-1-pro-preview/22671316816/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/cca1e142-0f1b-45ef-95fc-fde8e4c9d3aa"},"commit0":{"score":25.0,"cost":1.18,"runtime":446.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-gemini-3-1-pro-preview/25033174380/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/3817b8ac-4a44-42f7-b6ce-08f747db0061"},"swt-bench":{"score":64.0,"cost":0.5,"runtime":283.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-gemini-3-1-pro-preview/22508371137/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/e06a484a-ae98-42ab-b27d-89a17f9e56f3"},"gaia":{"score":76.4,"cost":0.12,"runtime":714.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-gemini-3-1-pro-preview/22676848520/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/3706e0fb-341c-4fa4-9d46-69f558a63a87"}},"categories":{"Issue Resolution":{"score":75.4,"cost":0.63,"runtime":983.0},"Frontend":{"score":44.1,"cost":1.24,"runtime":1868.0},"Greenfield":{"score":25.0,"cost":1.18,"runtime":446.0},"Testing":{"score":64.0,"cost":0.5,"runtime":283.0},"Information Gathering":{"score":76.4,"cost":0.12,"runtime":714.0}}},{"id":"OpenHands_v1.8.3_claude-sonnet-4-5","language_model":"claude-sonnet-4-5","sdk_version":"v1.8.3","openness":"closed","average_score":53.0,"average_cost":1.6320000000000001,"average_runtime":564.6,"categories_completed":5,"release_date":"2025-09-29","benchmarks":{"swe-bench":{"score":74.2,"cost":1.19,"runtime":534.0,"download_url":"https://results.eval.all-hands.dev/eval-21104232299-claude-son_litellm_proxy-claude-sonnet-4-5-20250929_26-01-18-07-25.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":36.8,"cost":1.89,"runtime":787.0,"download_url":"https://results.eval.all-hands.dev/eval-21318221216-claude-son_litellm_proxy-claude-sonnet-4-5-20250929_26-01-24-19-46.tar.gz","visualization_url":null},"commit0":{"score":12.5,"cost":3.23,"runtime":756.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-claude-sonnet-4-5-20250929/25079432235/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/17fc5d57-6aae-4dda-8f56-f6275df7db8e"},"swt-bench":{"score":68.8,"cost":0.98,"runtime":488.0,"download_url":"https://results.eval.all-hands.dev/eval-21146174206-claude-son_litellm_proxy-claude-sonnet-4-5-20250929_26-01-19-23-25.tar.gz","visualization_url":null},"gaia":{"score":72.7,"cost":0.87,"runtime":258.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-claude-sonnet-4-5-20250929/23440285883/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/b39f4bcc-7db3-435d-b7d1-517c269c16f2"}},"categories":{"Issue Resolution":{"score":74.2,"cost":1.19,"runtime":534.0},"Frontend":{"score":36.8,"cost":1.89,"runtime":787.0},"Greenfield":{"score":12.5,"cost":3.23,"runtime":756.0},"Testing":{"score":68.8,"cost":0.98,"runtime":488.0},"Information Gathering":{"score":72.7,"cost":0.87,"runtime":258.0}}},{"id":"OpenHands_v1.11.5_Qwen3.6-Plus","language_model":"Qwen3.6-Plus","sdk_version":"v1.11.5","openness":"closed","average_score":52.85999999999999,"average_cost":2.7540000000000004,"average_runtime":699.6,"categories_completed":5,"release_date":"2026-04-01","benchmarks":{"swe-bench":{"score":74.2,"cost":1.5379,"runtime":664.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-dashscope-qwen3-6-plus/23957434539/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/b9cb5405-1049-459f-8d4d-c256091a5549"},"swe-bench-multimodal":{"score":30.9,"cost":2.3158,"runtime":638.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-dashscope-qwen3-6-plus/23949324373/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/7c984e26-6009-4208-819f-a9ba57d5476f"},"commit0":{"score":25.0,"cost":7.4936,"runtime":1037.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-dashscope-qwen3-6-plus/25079439356/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/c5f31dca-5471-4ce3-830a-938de77baece"},"swt-bench":{"score":62.1,"cost":2.085,"runtime":851.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-dashscope-qwen3-6-plus/24043642717/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/d5c3aa56-4cbd-4bac-9dfa-8d85cc25e994"},"gaia":{"score":72.1,"cost":0.3377,"runtime":308.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-dashscope-qwen3-6-plus/24043637505/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/b26a4b3d-0bc2-41d8-b8bf-09453cf2fb53"}},"categories":{"Issue Resolution":{"score":74.2,"cost":1.5379,"runtime":664.0},"Frontend":{"score":30.9,"cost":2.3158,"runtime":638.0},"Greenfield":{"score":25.0,"cost":7.4936,"runtime":1037.0},"Testing":{"score":62.1,"cost":2.085,"runtime":851.0},"Information Gathering":{"score":72.1,"cost":0.3377,"runtime":308.0}}},{"id":"OpenHands_v1.11.5_GLM-5","language_model":"GLM-5","sdk_version":"v1.11.5","openness":"open","average_score":49.44,"average_cost":1.028,"average_runtime":1342.6,"categories_completed":5,"release_date":"2026-02-11","benchmarks":{"swe-bench":{"score":73.4,"cost":1.06,"runtime":1435.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-openrouter-z-ai-glm-5/22118959539/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/58f92ff8-e886-40ab-a636-8337fc8c9bfe"},"swe-bench-multimodal":{"score":35.3,"cost":0.58,"runtime":1140.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-z-ai-glm-5/22881368139/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/756a9904-f7f3-4c23-a663-8a7dd853d6dd"},"commit0":{"score":31.2,"cost":2.23,"runtime":1725.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openrouter-z-ai-glm-5/25008748071/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/76ea5306-b580-47c0-a9ad-67a1c995bfba"},"swt-bench":{"score":47.3,"cost":0.91,"runtime":1085.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-openrouter-z-ai-glm-5/22240291718/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/a7d29286-6810-4ca8-8a11-21d6ae33822b"},"gaia":{"score":60.0,"cost":0.36,"runtime":1328.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-openrouter-z-ai-glm-5/22117141473/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/c37cd7c9-3839-402d-bdf6-a0c10c4c085b"}},"categories":{"Issue Resolution":{"score":73.4,"cost":1.06,"runtime":1435.0},"Frontend":{"score":35.3,"cost":0.58,"runtime":1140.0},"Greenfield":{"score":31.2,"cost":2.23,"runtime":1725.0},"Testing":{"score":47.3,"cost":0.91,"runtime":1085.0},"Information Gathering":{"score":60.0,"cost":0.36,"runtime":1328.0}}},{"id":"OpenHands_v1.8.3_Kimi-K2.5","language_model":"Kimi-K2.5","sdk_version":"v1.8.3","openness":"open","average_score":49.17999999999999,"average_cost":0.81806,"average_runtime":898.6,"categories_completed":5,"release_date":"2026-01-27","benchmarks":{"swe-bench":{"score":68.8,"cost":0.4063,"runtime":707.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-moonshot-kimi-k2-5/21417485547/results.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":32.8,"cost":1.6213,"runtime":921.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-moonshot-kimi-k2-5/21492411890/results.tar.gz","visualization_url":null},"commit0":{"score":18.8,"cost":1.26,"runtime":1878.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-moonshot-kimi-k2-5/25033201272/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/7f44e8ae-607d-46e7-8f5d-b43abacf8c1b"},"swt-bench":{"score":61.9,"cost":0.4246,"runtime":385.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-moonshot-kimi-k2-5/21535132257/results.tar.gz","visualization_url":null},"gaia":{"score":63.6,"cost":0.3781,"runtime":602.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-moonshot-kimi-k2-5/21497407856/results.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":68.8,"cost":0.4063,"runtime":707.0},"Frontend":{"score":32.8,"cost":1.6213,"runtime":921.0},"Greenfield":{"score":18.8,"cost":1.26,"runtime":1878.0},"Testing":{"score":61.9,"cost":0.4246,"runtime":385.0},"Information Gathering":{"score":63.6,"cost":0.3781,"runtime":602.0}}},{"id":"OpenHands_v1.8.3_Gemini-3-Pro","language_model":"Gemini-3-Pro","sdk_version":"v1.8.3","openness":"closed","average_score":49.04,"average_cost":1.42,"average_runtime":1090.6,"categories_completed":5,"release_date":"2025-11-18","benchmarks":{"swe-bench":{"score":70.6,"cost":0.95,"runtime":343.0,"download_url":"https://results.eval.all-hands.dev/eval-21011486666-gemini-3-p_litellm_proxy-gemini-gemini-3-pro-preview_26-01-15-07-43.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":36.8,"cost":1.46,"runtime":710.0,"download_url":"https://results.eval.all-hands.dev/eval-21320838747-gemini-3-p_litellm_proxy-gemini-gemini-3-pro-preview_26-01-24-22-39.tar.gz","visualization_url":null},"commit0":{"score":25.0,"cost":3.18,"runtime":2239.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-gemini-3-pro-preview/22100005487/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/bfba3513-7461-42da-9944-874848240c29"},"swt-bench":{"score":68.6,"cost":1.01,"runtime":386.0,"download_url":"https://results.eval.all-hands.dev/eval-21186630675-gemini-3-p_litellm_proxy-gemini-gemini-3-pro-preview_26-01-21-07-16.tar.gz","visualization_url":null},"gaia":{"score":44.2,"cost":0.5,"runtime":1775.0,"download_url":"https://results.eval.all-hands.dev/eval-21272815800-gemini-3-p_litellm_proxy-gemini-gemini-3-pro-preview_26-01-24-00-30.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":70.6,"cost":0.95,"runtime":343.0},"Frontend":{"score":36.8,"cost":1.46,"runtime":710.0},"Greenfield":{"score":25.0,"cost":3.18,"runtime":2239.0},"Testing":{"score":68.6,"cost":1.01,"runtime":386.0},"Information Gathering":{"score":44.2,"cost":0.5,"runtime":1775.0}}},{"id":"OpenHands_v1.8.3_Gemini-3-Flash","language_model":"Gemini-3-Flash","sdk_version":"v1.8.3","openness":"closed","average_score":49.0,"average_cost":0.9039999999999999,"average_runtime":767.6,"categories_completed":5,"release_date":"2025-12-22","benchmarks":{"swe-bench":{"score":74.6,"cost":0.42,"runtime":343.0,"download_url":"https://results.eval.all-hands.dev/eval-21179606180-gemini-3-f_litellm_proxy-gemini-gemini-3-flash-preview_26-01-21-03-09.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":22.1,"cost":0.8,"runtime":1152.0,"download_url":"https://results.eval.all-hands.dev/eval-21324688868-gemini-3-f_litellm_proxy-gemini-gemini-3-flash-preview_26-01-25-09-36.tar.gz","visualization_url":null},"commit0":{"score":18.8,"cost":2.62,"runtime":1732.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-gemini-3-flash-preview/25033158735/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/3501752b-0081-4611-9807-335281599b80"},"swt-bench":{"score":70.7,"cost":0.3,"runtime":213.0,"download_url":"https://results.eval.all-hands.dev/eval-21179607467-gemini-3-f_litellm_proxy-gemini-gemini-3-flash-preview_26-01-21-01-44.tar.gz","visualization_url":null},"gaia":{"score":58.8,"cost":0.38,"runtime":398.0,"download_url":"https://results.eval.all-hands.dev/eval-20821586318-gemini-3-f_litellm_proxy-gemini-gemini-3-flash-preview_26-01-08-20-24.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":74.6,"cost":0.42,"runtime":343.0},"Frontend":{"score":22.1,"cost":0.8,"runtime":1152.0},"Greenfield":{"score":18.8,"cost":2.62,"runtime":1732.0},"Testing":{"score":70.7,"cost":0.3,"runtime":213.0},"Information Gathering":{"score":58.8,"cost":0.38,"runtime":398.0}}},{"id":"OpenHands_v1.8.3_DeepSeek-V3.2-Reasoner","language_model":"DeepSeek-V3.2-Reasoner","sdk_version":"v1.8.3","openness":"open","average_score":45.67999999999999,"average_cost":0.22000000000000003,"average_runtime":1253.8,"categories_completed":5,"release_date":"2025-12-01","benchmarks":{"swe-bench":{"score":71.6,"cost":0.16,"runtime":1429.0,"download_url":"https://results.eval.all-hands.dev/eval-21386741317-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-27-17-23.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":27.9,"cost":0.19,"runtime":1515.0,"download_url":"https://results.eval.all-hands.dev/eval-21345780997-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-26-11-33.tar.gz","visualization_url":null},"commit0":{"score":25.0,"cost":0.57,"runtime":1683.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-deepseek-deepseek-reasoner/25033123976/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/df76145a-e319-468e-ae2e-07f71208496f"},"swt-bench":{"score":53.6,"cost":0.12,"runtime":1215.0,"download_url":"https://results.eval.all-hands.dev/eval-21233988879-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-24-02-34.tar.gz","visualization_url":null},"gaia":{"score":50.3,"cost":0.06,"runtime":427.0,"download_url":"https://results.eval.all-hands.dev/eval-21070491317-deepseek-v_litellm_proxy-deepseek-deepseek-reasoner_26-01-16-16-39.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":71.6,"cost":0.16,"runtime":1429.0},"Frontend":{"score":27.9,"cost":0.19,"runtime":1515.0},"Greenfield":{"score":25.0,"cost":0.57,"runtime":1683.0},"Testing":{"score":53.6,"cost":0.12,"runtime":1215.0},"Information Gathering":{"score":50.3,"cost":0.06,"runtime":427.0}}},{"id":"OpenHands_v0.38.0_MiniMax-M2.5","language_model":"MiniMax-M2.5","sdk_version":"v0.38.0","openness":"open","average_score":45.22,"average_cost":0.33465999999999996,"average_runtime":780.2,"categories_completed":5,"release_date":"2026-02-11","benchmarks":{"swe-bench":{"score":72.6,"cost":0.3066,"runtime":455.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-jade-spark-2862/21885618644/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/5f0703c7-f946-414b-b3ba-f4ce77aad1b5"},"swe-bench-multimodal":{"score":25.0,"cost":0.5941,"runtime":611.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-jade-spark-2862/21900356665/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/194e3e71-5874-45a5-96b8-220155ea1e24"},"commit0":{"score":12.5,"cost":0.49,"runtime":1730.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-minimax-MiniMax-M2-5/24902079478/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/c06b32df-bf9e-4d10-9e81-93f2c01955a5"},"swt-bench":{"score":68.1,"cost":0.2239,"runtime":389.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-jade-spark-2862/21870831025/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/9bbf1e2a-6085-4a0a-b68f-357f417f4012"},"gaia":{"score":47.9,"cost":0.0587,"runtime":716.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-jade-spark-2862/21896759788/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/ac28bd0e-7131-47b4-ba01-e64d1c49df45"}},"categories":{"Issue Resolution":{"score":72.6,"cost":0.3066,"runtime":455.0},"Frontend":{"score":25.0,"cost":0.5941,"runtime":611.0},"Greenfield":{"score":12.5,"cost":0.49,"runtime":1730.0},"Testing":{"score":68.1,"cost":0.2239,"runtime":389.0},"Information Gathering":{"score":47.9,"cost":0.0587,"runtime":716.0}}},{"id":"OpenHands_v1.11.5_claude-sonnet-4-6","language_model":"claude-sonnet-4-6","sdk_version":"v1.11.5","openness":"closed","average_score":44.52,"average_cost":2.206,"average_runtime":737.0,"categories_completed":5,"release_date":"2026-02-17","benchmarks":{"swe-bench":{"score":74.4,"cost":1.03,"runtime":421.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-anthropic-claude-sonnet-4-6/22234586134/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/0e4ec892-21dc-424f-8802-b4db76c18123"},"swe-bench-multimodal":{"score":30.9,"cost":2.24,"runtime":931.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-anthropic-claude-sonnet-4-6/22360554239/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/ed24d3b6-988c-4ef1-b709-b0f223173547"},"commit0":{"score":50.0,"cost":6.48,"runtime":1760.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-anthropic-claude-sonnet-4-6/24870930029/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/a0e8861b-136f-4ff3-be1a-dcf2daa2428a"},"swt-bench":{"score":54.0,"cost":0.87,"runtime":346.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-anthropic-claude-sonnet-4-6/22443433896/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/17b3fd34-7334-4c67-a510-cf2e094998c2"},"gaia":{"score":13.3,"cost":0.41,"runtime":227.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-anthropic-claude-sonnet-4-6/22328833272/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/452a6943-1a58-4ef5-a2c7-ab2328fa4501"}},"categories":{"Issue Resolution":{"score":74.4,"cost":1.03,"runtime":421.0},"Frontend":{"score":30.9,"cost":2.24,"runtime":931.0},"Greenfield":{"score":50.0,"cost":6.48,"runtime":1760.0},"Testing":{"score":54.0,"cost":0.87,"runtime":346.0},"Information Gathering":{"score":13.3,"cost":0.41,"runtime":227.0}}},{"id":"OpenHands_v1.14.0_Minimax-2.7","language_model":"Minimax-2.7","sdk_version":"v1.14.0","openness":"open","average_score":43.379999999999995,"average_cost":0.36094,"average_runtime":852.4,"categories_completed":5,"release_date":"2026-03-18","benchmarks":{"swe-bench":{"score":75.6,"cost":0.1795,"runtime":529.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-minimax-MiniMax-M2-7/23463806447/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/a248d81f-d8d0-4823-9853-0378eed4dcc4"},"swe-bench-multimodal":{"score":27.9,"cost":0.388,"runtime":1084.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-minimax-MiniMax-M2-7/23369175877/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/161dd621-b70d-4089-afcf-ea2e3d13e874"},"commit0":{"score":18.8,"cost":0.8926,"runtime":1621.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-minimax-MiniMax-M2-7/25008721830/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/081a0b7f-fef2-4dad-a267-49f90b2fc866"},"swt-bench":{"score":69.1,"cost":0.1283,"runtime":352.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-minimax-MiniMax-M2-7/24039895569/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/f42deebe-f174-41f7-96e8-3e2fbc8a61d7"},"gaia":{"score":25.5,"cost":0.2163,"runtime":676.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-minimax-MiniMax-M2-7/23855930236/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/6839ba33-64fc-4b43-8ec2-e0c18f2f94bf"}},"categories":{"Issue Resolution":{"score":75.6,"cost":0.1795,"runtime":529.0},"Frontend":{"score":27.9,"cost":0.388,"runtime":1084.0},"Greenfield":{"score":18.8,"cost":0.8926,"runtime":1621.0},"Testing":{"score":69.1,"cost":0.1283,"runtime":352.0},"Information Gathering":{"score":25.5,"cost":0.2163,"runtime":676.0}}},{"id":"OpenHands_v1.8.3_GLM-4.7","language_model":"GLM-4.7","sdk_version":"v1.8.3","openness":"open","average_score":42.260000000000005,"average_cost":0.67592,"average_runtime":1218.8,"categories_completed":5,"release_date":"2025-12-22","benchmarks":{"swe-bench":{"score":73.4,"cost":0.5596,"runtime":1007.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-openrouter-z-ai-glm-4-7/21547894190/results.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":22.1,"cost":0.66,"runtime":1519.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-z-ai-glm-4-7/21674056150/results.tar.gz","visualization_url":null},"commit0":{"score":12.5,"cost":1.64,"runtime":1686.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openrouter-z-ai-glm-4-7/25033135919/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/84ad9e18-dc52-4085-bc7f-54205df4fae4"},"swt-bench":{"score":49.4,"cost":0.37,"runtime":744.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-openrouter-z-ai-glm-4-7/21548136286/results.tar.gz","visualization_url":null},"gaia":{"score":53.9,"cost":0.15,"runtime":1138.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-openrouter-z-ai-glm-4-7/21520797407/results.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":73.4,"cost":0.5596,"runtime":1007.0},"Frontend":{"score":22.1,"cost":0.66,"runtime":1519.0},"Greenfield":{"score":12.5,"cost":1.64,"runtime":1686.0},"Testing":{"score":49.4,"cost":0.37,"runtime":744.0},"Information Gathering":{"score":53.9,"cost":0.15,"runtime":1138.0}}},{"id":"OpenHands_v1.8.3_MiniMax-M2.1","language_model":"MiniMax-M2.1","sdk_version":"v1.8.3","openness":"open","average_score":41.16,"average_cost":0.6574600000000002,"average_runtime":818.0,"categories_completed":5,"release_date":"2025-12-23","benchmarks":{"swe-bench":{"score":68.8,"cost":0.7118,"runtime":579.0,"download_url":"https://results.eval.all-hands.dev/eval-21213160613-minimax-m2_litellm_proxy-minimax-MiniMax-M2-1_26-01-21-21-13.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":16.2,"cost":1.4043,"runtime":1417.0,"download_url":"https://results.eval.all-hands.dev/eval-21324690733-minimax-m2_litellm_proxy-minimax-MiniMax-M2-1_26-01-25-06-38.tar.gz","visualization_url":null},"commit0":{"score":18.8,"cost":0.38,"runtime":980.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-minimax-MiniMax-M2-1/25079289346/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/6b9df675-af26-4bd6-89b5-d18b38782a92"},"swt-bench":{"score":61.4,"cost":0.5441,"runtime":473.0,"download_url":"https://results.eval.all-hands.dev/eval-21226206260-minimax-m2_litellm_proxy-minimax-MiniMax-M2-1_26-01-23-09-13.tar.gz","visualization_url":null},"gaia":{"score":40.6,"cost":0.2471,"runtime":641.0,"download_url":"https://results.eval.all-hands.dev/eval-21225856759-minimax-m2_litellm_proxy-minimax-MiniMax-M2-1_26-01-21-23-35.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":68.8,"cost":0.7118,"runtime":579.0},"Frontend":{"score":16.2,"cost":1.4043,"runtime":1417.0},"Greenfield":{"score":18.8,"cost":0.38,"runtime":980.0},"Testing":{"score":61.4,"cost":0.5441,"runtime":473.0},"Information Gathering":{"score":40.6,"cost":0.2471,"runtime":641.0}}},{"id":"OpenHands_v1.8.3_Kimi-K2-Thinking","language_model":"Kimi-K2-Thinking","sdk_version":"v1.8.3","openness":"open","average_score":40.99999999999999,"average_cost":1.3780000000000001,"average_runtime":1267.6,"categories_completed":5,"release_date":"2025-11-06","benchmarks":{"swe-bench":{"score":69.2,"cost":2.0,"runtime":1325.0,"download_url":"https://results.eval.all-hands.dev/eval-21078698262-kimi-k2-th_litellm_proxy-moonshot-kimi-k2-thinking_26-01-17-16-26.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":32.4,"cost":2.31,"runtime":1641.0,"download_url":"https://results.eval.all-hands.dev/eval-21320841046-kimi-k2-th_litellm_proxy-moonshot-kimi-k2-thinking_26-01-25-05-30.tar.gz","visualization_url":null},"commit0":{"score":12.5,"cost":0.54,"runtime":1840.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-moonshot-kimi-k2-thinking/25033195046/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/411cb079-1f09-4779-96b9-4c658df2c0be"},"swt-bench":{"score":47.3,"cost":1.39,"runtime":1253.0,"download_url":"https://results.eval.all-hands.dev/eval-21173239168-kimi-k2-th_litellm_proxy-moonshot-kimi-k2-thinking_26-01-21-08-11.tar.gz","visualization_url":null},"gaia":{"score":43.6,"cost":0.65,"runtime":279.0,"download_url":"https://results.eval.all-hands.dev/eval-21093372341-kimi-k2-th_litellm_proxy-moonshot-kimi-k2-thinking_26-01-17-13-03.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":69.2,"cost":2.0,"runtime":1325.0},"Frontend":{"score":32.4,"cost":2.31,"runtime":1641.0},"Greenfield":{"score":12.5,"cost":0.54,"runtime":1840.0},"Testing":{"score":47.3,"cost":1.39,"runtime":1253.0},"Information Gathering":{"score":43.6,"cost":0.65,"runtime":279.0}}},{"id":"OpenHands_v1.11.5_Qwen3.5-Flash","language_model":"Qwen3.5-Flash","sdk_version":"v1.11.5","openness":"open","average_score":38.08,"average_cost":2.2339,"average_runtime":2909.6,"categories_completed":5,"release_date":"2026-02-24","benchmarks":{"swe-bench":{"score":62.0,"cost":2.1639,"runtime":4132.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24193134639/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/828c5288-5e15-4660-9f22-c6bfe6e819e5"},"swe-bench-multimodal":{"score":27.9,"cost":2.6218,"runtime":2459.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24426485940/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/84c8dc14-d082-430d-b19c-e142de615385"},"commit0":{"score":12.5,"cost":4.3309,"runtime":4027.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/22443226509/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/2416b7ce-7841-4d7b-b8fb-83ad5c992826"},"swt-bench":{"score":38.3,"cost":1.7107,"runtime":3083.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24086513973/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/4ef023af-364e-4ba6-ac3d-19336bd4af18"},"gaia":{"score":49.7,"cost":0.3422,"runtime":847.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-dashscope-qwen3-5-flash-2026-02-23/24193141939/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/9a6fe5b9-dc75-41b4-a7e5-6c4817ae4c50"}},"categories":{"Issue Resolution":{"score":62.0,"cost":2.1639,"runtime":4132.0},"Frontend":{"score":27.9,"cost":2.6218,"runtime":2459.0},"Greenfield":{"score":12.5,"cost":4.3309,"runtime":4027.0},"Testing":{"score":38.3,"cost":1.7107,"runtime":3083.0},"Information Gathering":{"score":49.7,"cost":0.3422,"runtime":847.0}}},{"id":"OpenHands_v1.14.0_Nemotron-3-Super","language_model":"Nemotron-3-Super","sdk_version":"v1.14.0","openness":"open","average_score":36.160000000000004,"average_cost":0.58068,"average_runtime":1328.8,"categories_completed":5,"release_date":"2026-03-11","benchmarks":{"swe-bench":{"score":62.0,"cost":0.4663,"runtime":874.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-converse-nemotron-super-3-120b/24135041475/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/054f0361-d26c-4466-82ae-042503b93f2e"},"swe-bench-multimodal":{"score":20.6,"cost":0.7679,"runtime":1311.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-converse-nemotron-super-3-120b/24197975725/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/44cc701e-6ecf-4c70-9cca-4246cbd402f4"},"commit0":{"score":12.5,"cost":1.2153,"runtime":2905.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-converse-nemotron-super-3-120b/24197990049/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/ba725a00-2947-484f-9dad-951071d34c3d"},"swt-bench":{"score":45.7,"cost":0.3333,"runtime":1027.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-converse-nemotron-super-3-120b/24106920313/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/01e58390-f947-4dcc-8d34-5e8c61bd3788"},"gaia":{"score":40.0,"cost":0.1206,"runtime":527.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-converse-nemotron-super-3-120b/24197982616/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/83b65b31-9799-4019-844c-815151e4798e"}},"categories":{"Issue Resolution":{"score":62.0,"cost":0.4663,"runtime":874.0},"Frontend":{"score":20.6,"cost":0.7679,"runtime":1311.0},"Greenfield":{"score":12.5,"cost":1.2153,"runtime":2905.0},"Testing":{"score":45.7,"cost":0.3333,"runtime":1027.0},"Information Gathering":{"score":40.0,"cost":0.1206,"runtime":527.0}}},{"id":"OpenHands_v1.11.0_Qwen3-Coder-Next","language_model":"Qwen3-Coder-Next","sdk_version":"v1.11.0","openness":"open","average_score":34.68,"average_cost":1.0125,"average_runtime":1068.0,"categories_completed":4,"release_date":"2026-02-02","benchmarks":{"swe-bench":{"score":66.6,"cost":1.36,"runtime":1445.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-openrouter-qwen-qwen3-coder-next/21713643251/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/68c415d3-db32-418b-b0b4-2030edc2216e"},"swe-bench-multimodal":{"score":30.9,"cost":1.52,"runtime":1589.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-openrouter-qwen-qwen3-coder-next/21759113321/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/8964f6e6-fcbd-4f26-a1ce-9fc657b8da43"},"commit0":{"score":25.0,"cost":1.04,"runtime":751.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-openrouter-qwen-qwen3-coder-next/21713658799/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/0a1fbe3f-6704-42ad-8b8a-d598dbf493d8"},"swt-bench":{"score":null,"cost":null,"runtime":null,"download_url":null,"visualization_url":null},"gaia":{"score":50.9,"cost":0.13,"runtime":487.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-openrouter-qwen-qwen3-coder-next/21862734213/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/04627eda-dae6-42d7-9533-669b5187640d"}},"categories":{"Issue Resolution":{"score":66.6,"cost":1.36,"runtime":1445.0},"Frontend":{"score":30.9,"cost":1.52,"runtime":1589.0},"Greenfield":{"score":25.0,"cost":1.04,"runtime":751.0},"Testing":{"score":null,"cost":null,"runtime":null},"Information Gathering":{"score":50.9,"cost":0.13,"runtime":487.0}}},{"id":"OpenHands_v1.8.3_Qwen3-Coder-480B","language_model":"Qwen3-Coder-480B","sdk_version":"v1.8.3","openness":"open","average_score":30.940000000000005,"average_cost":0.9219999999999999,"average_runtime":502.0,"categories_completed":5,"release_date":"2025-07-23","benchmarks":{"swe-bench":{"score":62.4,"cost":1.26,"runtime":680.0,"download_url":"https://results.eval.all-hands.dev/eval-20979851181-qwen-3-cod_litellm_proxy-fireworks_ai-qwen3-coder-480b-a35b-instruct_26-01-14-09-02.tar.gz","visualization_url":null},"swe-bench-multimodal":{"score":23.5,"cost":2.09,"runtime":1006.0,"download_url":"https://results.eval.all-hands.dev/eval-21357041327-qwen-3-cod_litellm_proxy-fireworks_ai-qwen3-coder-480b-a35b-instruct_26-01-26-15-08.tar.gz","visualization_url":null},"commit0":{"score":0.0,"cost":0.01,"runtime":1.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-fireworks_ai-qwen3-coder-480b-a35b-instruct/22104205466/results.tar.gz","visualization_url":null},"swt-bench":{"score":34.9,"cost":0.97,"runtime":626.0,"download_url":"https://results.eval.all-hands.dev/eval-21179579508-qwen-3-cod_litellm_proxy-fireworks_ai-qwen3-coder-480b-a35b-instruct_26-01-21-01-51.tar.gz","visualization_url":null},"gaia":{"score":33.9,"cost":0.28,"runtime":197.0,"download_url":"https://results.eval.all-hands.dev/eval-21093373009-qwen-3-cod_litellm_proxy-fireworks_ai-qwen3-coder-480b-a35b-instruct_26-01-17-12-54.tar.gz","visualization_url":null}},"categories":{"Issue Resolution":{"score":62.4,"cost":1.26,"runtime":680.0},"Frontend":{"score":23.5,"cost":2.09,"runtime":1006.0},"Greenfield":{"score":0.0,"cost":0.01,"runtime":1.0},"Testing":{"score":34.9,"cost":0.97,"runtime":626.0},"Information Gathering":{"score":33.9,"cost":0.28,"runtime":197.0}}},{"id":"OpenHands_v1.11.5_Kimi-K2.6","language_model":"Kimi-K2.6","sdk_version":"v1.11.5","openness":"open","average_score":29.0,"average_cost":0.5,"average_runtime":897.0,"categories_completed":2,"release_date":"2026-04-20","benchmarks":{"swe-bench":{"score":74.6,"cost":0.67,"runtime":1077.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-moonshot-kimi-k2-6/25007210109/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/e024c27b-0bc2-437c-8b36-f3695942dcef"},"swe-bench-multimodal":{"score":null,"cost":null,"runtime":null,"download_url":null,"visualization_url":null},"commit0":{"score":null,"cost":null,"runtime":null,"download_url":null,"visualization_url":null},"swt-bench":{"score":70.4,"cost":0.33,"runtime":717.0,"download_url":"https://results.eval.all-hands.dev/swtbench/litellm_proxy-moonshot-kimi-k2-6/24901879531/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/9a4d9991-37b4-44a1-acff-2057c6525748"},"gaia":{"score":null,"cost":null,"runtime":null,"download_url":null,"visualization_url":null}},"categories":{"Issue Resolution":{"score":74.6,"cost":0.67,"runtime":1077.0},"Frontend":{"score":null,"cost":null,"runtime":null},"Greenfield":{"score":null,"cost":null,"runtime":null},"Testing":{"score":70.4,"cost":0.33,"runtime":717.0},"Information Gathering":{"score":null,"cost":null,"runtime":null}}},{"id":"OpenHands_v1.16.1_Trinity-Large-Thinking","language_model":"Trinity-Large-Thinking","sdk_version":"v1.16.1","openness":"open","average_score":25.4,"average_cost":0.745,"average_runtime":1055.0,"categories_completed":4,"release_date":"2026-04-01","benchmarks":{"swe-bench":{"score":56.8,"cost":0.69,"runtime":1204.0,"download_url":"https://results.eval.all-hands.dev/swebench/litellm_proxy-trinity-large-thinking/24168488722/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/8f5a30c8-5d86-4615-bc69-6d917e247944"},"swe-bench-multimodal":{"score":25.0,"cost":0.74,"runtime":930.0,"download_url":"https://results.eval.all-hands.dev/swebenchmultimodal/litellm_proxy-trinity-large-thinking/24216306617/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/f327bc7a-c85b-4a89-b0c9-2da1532bcec4"},"commit0":{"score":12.5,"cost":1.2,"runtime":1611.0,"download_url":"https://results.eval.all-hands.dev/commit0/litellm_proxy-trinity-large-thinking/24368419290/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/8d97d9a6-f993-44f6-aacd-a6977c811bd3"},"swt-bench":{"score":null,"cost":null,"runtime":null,"download_url":null,"visualization_url":null},"gaia":{"score":32.7,"cost":0.35,"runtime":475.0,"download_url":"https://results.eval.all-hands.dev/gaia/litellm_proxy-trinity-large-thinking/24216307983/results.tar.gz","visualization_url":"https://laminar.sh/shared/evals/74af64d2-66d2-4cb1-8d23-f981fafc88c6"}},"categories":{"Issue Resolution":{"score":56.8,"cost":0.69,"runtime":1204.0},"Frontend":{"score":25.0,"cost":0.74,"runtime":930.0},"Greenfield":{"score":12.5,"cost":1.2,"runtime":1611.0},"Testing":{"score":null,"cost":null,"runtime":null},"Information Gathering":{"score":32.7,"cost":0.35,"runtime":475.0}}}],"total_count":28,"categories":["Issue Resolution","Frontend","Greenfield","Testing","Information Gathering"],"benchmarks":["swe-bench","swe-bench-multimodal","commit0","swt-bench","gaia"],"fetched_at":"2026-05-11T15:36:50.739085"}