Drop results JSON here

or use the buttons above

{{ activeRun.agentName }}

{{ formattedDate }} · {{ activeRun.tasks.length }} tasks · run {{ activeRun.id }}

{{ activeRun.totalScore }} / {{ activeRun.maxScore }}
{{ Math.round(activeRun.passRate * 100) }}% pass
{{ cat.label }}
{{ catData(cat.key).score }}
of {{ catData(cat.key).maxScore }} points
{{ catData(cat.key).passed }}/{{ catData(cat.key).total }} tasks passed

Tasks

{{ task.taskName }}
{{ task.taskId }}
{{ shortCat(task.category) }} {{ task.difficulty }}
{{ task.score }}/{{ task.maxScore }}
{{ formatMs(task.durationMs) }}
{{ v.label }}
{{ v.passed ? '+' + v.points : '0' }}/{{ v.points }}
{{ v.detail }}
LLM Judge {{ task.judge.score }}/{{ task.judge.maxScore }}
{{ task.judge.reasoning }}
{{ c.name }}
{{ c.score }}/{{ c.max }}
{{ c.note }}

Leaderboard

# Model Score {{ task.shortLabel }}
{{ i + 1 }} {{ row.agentName }} {{ row.totalScore }} {{ Math.round(row.passRate * 100) }}%