{"schema_version":"onlylabs.public_signal.v1","title":"Amazon (Nova) Repo: amazon-science/StaminaBench","description":"Amazon (Nova) repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421","json_url":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421/signal.json","generated_at":"2026-06-27T00:34:14.884Z","evidence_latest_fetched_at":"2026-06-20T07:04:51.295919+00:00","signal_first_seen_at":"2026-06-20T07:00:24.771019+00:00","org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/amazon","dossier_json_url":"https://onlylabs.fyi/labs/amazon/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421","signal_json":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421/signal.json","source":"https://github.com/amazon-science/StaminaBench","lab_dossier":"https://onlylabs.fyi/labs/amazon","lab_dossier_json":"https://onlylabs.fyi/labs/amazon/dossier.json","analysis":"https://onlylabs.fyi/analysis/amazon","analysis_json":"https://onlylabs.fyi/analysis/amazon/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/amazon/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"Amazon (Nova) published amazon-science/StaminaBench (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo amazon-science/StaminaBench · language Python · Benchmark for testing long-context language model performance.. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/amazon-science/StaminaBench","source_host":"github.com","occurred_at":"2026-06-15T20:04:53+00:00","first_seen_at":"2026-06-20T07:00:24.771019+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Amazon (Nova)","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"amazon-science/StaminaBench","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Context","value":"Benchmark for testing long-context language model performance.","source":"signal"},{"label":"Notability","value":"New benchmark repo from Amazon Research.","source":"signal"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/amazon-science/StaminaBench"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-20T07:04:51.295919+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421/signal.json","dossier_json":"https://onlylabs.fyi/labs/amazon/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/amazon/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/amazon/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421/signal.json","required":true},{"label":"source","url":"https://github.com/amazon-science/StaminaBench","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/amazon/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/amazon/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Amazon (Nova)'s repo signal \"amazon-science/StaminaBench\" for frontier lab strategy."},"semantic_triples":[{"subject":"Amazon (Nova)","predicate":"published repo","object":"amazon-science/StaminaBench","text":"Amazon (Nova) published repo amazon-science/StaminaBench."},{"subject":"amazon-science/StaminaBench","predicate":"is classified as","object":"repo signal","text":"amazon-science/StaminaBench is classified as repo signal."},{"subject":"amazon-science/StaminaBench","predicate":"belongs to","object":"repos desk","text":"amazon-science/StaminaBench belongs to repos desk."},{"subject":"amazon-science/StaminaBench","predicate":"has context","object":"Python","text":"amazon-science/StaminaBench has context Python."},{"subject":"amazon-science/StaminaBench","predicate":"has evidence coverage","object":"1 captured evidence page","text":"amazon-science/StaminaBench has evidence coverage 1 captured evidence page."},{"subject":"amazon-science/StaminaBench","predicate":"has captured page count","object":"1","text":"amazon-science/StaminaBench has captured page count 1."},{"subject":"amazon-science/StaminaBench","predicate":"has readable page count","object":"1","text":"amazon-science/StaminaBench has readable page count 1."},{"subject":"amazon-science/StaminaBench","predicate":"has related signal count","object":"6","text":"amazon-science/StaminaBench has related signal count 6."},{"subject":"amazon-science/StaminaBench","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"amazon-science/StaminaBench has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"amazon-science/StaminaBench","predicate":"has source host","object":"github.com","text":"amazon-science/StaminaBench has source host github.com."},{"subject":"amazon-science/StaminaBench","predicate":"has lab","object":"Amazon (Nova)","text":"amazon-science/StaminaBench has lab Amazon (Nova)."},{"subject":"amazon-science/StaminaBench","predicate":"has signal desk","object":"repos","text":"amazon-science/StaminaBench has signal desk repos."},{"subject":"amazon-science/StaminaBench","predicate":"has source host","object":"github.com","text":"amazon-science/StaminaBench has source host github.com."},{"subject":"amazon-science/StaminaBench","predicate":"has repository","object":"amazon-science/StaminaBench","text":"amazon-science/StaminaBench has repository amazon-science/StaminaBench."},{"subject":"amazon-science/StaminaBench","predicate":"has language","object":"Python","text":"amazon-science/StaminaBench has language Python."},{"subject":"amazon-science/StaminaBench","predicate":"has context","object":"Benchmark for testing long-context language model performance.","text":"amazon-science/StaminaBench has context Benchmark for testing long-context language model performance.."},{"subject":"amazon-science/StaminaBench","predicate":"has notability","object":"New benchmark repo from Amazon Research.","text":"amazon-science/StaminaBench has notability New benchmark repo from Amazon Research.."},{"subject":"amazon-science/StaminaBench","predicate":"has watch term","object":"Eval methodology","text":"amazon-science/StaminaBench has watch term Eval methodology."}]},"intelligence":{"signal_desk":"repos","answer":"Amazon (Nova) published amazon-science/StaminaBench (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo amazon-science/StaminaBench · language Python · Benchmark for testing long-context language model performance.. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"Amazon (Nova)","predicate":"published repo","object":"amazon-science/StaminaBench","text":"Amazon (Nova) published repo amazon-science/StaminaBench."},{"subject":"amazon-science/StaminaBench","predicate":"is classified as","object":"repo signal","text":"amazon-science/StaminaBench is classified as repo signal."},{"subject":"amazon-science/StaminaBench","predicate":"belongs to","object":"repos desk","text":"amazon-science/StaminaBench belongs to repos desk."},{"subject":"amazon-science/StaminaBench","predicate":"has context","object":"Python","text":"amazon-science/StaminaBench has context Python."},{"subject":"amazon-science/StaminaBench","predicate":"has evidence coverage","object":"1 captured evidence page","text":"amazon-science/StaminaBench has evidence coverage 1 captured evidence page."}]},"signal":{"id":"7d2f49b8-2782-40fc-af6b-64e26f84a421","url":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421","json_url":"https://onlylabs.fyi/signals/7d2f49b8-2782-40fc-af6b-64e26f84a421/signal.json","source_url":"https://github.com/amazon-science/StaminaBench","title":"amazon-science/StaminaBench","summary":"Amazon (Nova) published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-06-15T20:04:53+00:00","first_seen_at":"2026-06-20T07:00:24.771019+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/amazon-science/StaminaBench"]},"facets":{"repo":"amazon-science/StaminaBench","language":"Python"},"traction":{"github_stars":0,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"is_primary":true,"source_match":true,"url":"https://github.com/amazon-science/StaminaBench","final_url":"https://github.com/amazon-science/StaminaBench","title":"amazon-science/StaminaBench repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-20T07:04:51.295919+00:00","bytes":30819,"raw_path":"8019facfcb048c833ee26118557226fa3164e5cb7dba721474c7b754aec331af.json","content_hash":"3d2a415648ab2dcf4fc52fb7608d4ab05210fc4483d3d6a7ae8fe9222e730bc2","excerpt_chars":1200,"truncated":true,"excerpt":"amazon-science/StaminaBench Language: Python License: NOASSERTION Stars: 0 Forks: 0 Open issues: 5 Created: 2026-06-15T20:04:53Z Pushed: 2026-06-20T00:24:05Z Default branch: main Fork: no Archived: no README: StaminaBench: Stress-Testing Coding Agents over 100 Interaction Turns [paper]() StaminaBench is a framework for evaluating LLM-powered coding agents on iterative software engineering tasks. The primary benchmark, **Iterative REST Server Generation**, asks an agent to implement a REST API from a natural-language specification, runs a test suite against the agent's server, feeds back failures, and iterates. After each turn the schema evolves (new entities, renamed fields, new guard conditions, etc.) and the agent must keep the server consistent with the updated spec. How it fits together There are three moving parts: 1. **Scenario data** — a deterministic schema (entities, fields, actions, analytics) plus, per turn, a natural-language spec, a pytest suite, and a ground-truth Flask server. Generated either programmatically (offline, no LLM) or via LLM (richer, requires Bedrock). 2. **Agent harness** — a thin Python wrapper around a CLI (Mini-SWE, OpenHands, OpenCode, …) that..."},"evidence_pages":[],"related_signals":[{"id":"bdb54b05-1c53-4df5-95a8-9b40f21c2c4f","url":"https://onlylabs.fyi/signals/bdb54b05-1c53-4df5-95a8-9b40f21c2c4f","source_url":"https://github.com/amazon-science/SenTSR-Bench","title":"amazon-science/SenTSR-Bench","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-06-17T06:16:27+00:00","first_seen_at":"2026-06-17T07:01:21.192218+00:00","date_source":"source"},{"id":"8dd3596d-bbed-4f82-bcb1-189231607b92","url":"https://onlylabs.fyi/signals/8dd3596d-bbed-4f82-bcb1-189231607b92","source_url":"https://github.com/amazon-science/foundcause","title":"amazon-science/foundcause","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-06-16T02:49:26+00:00","first_seen_at":"2026-06-17T07:01:21.192218+00:00","date_source":"source"},{"id":"087c32a2-6ad0-4981-9315-11fdd32a0153","url":"https://onlylabs.fyi/signals/087c32a2-6ad0-4981-9315-11fdd32a0153","source_url":"https://github.com/amazon-science/reskill","title":"amazon-science/reskill","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-06-04T02:13:35+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e5701aed-6cd3-48dd-bfa6-ef839031e2e8","url":"https://onlylabs.fyi/signals/e5701aed-6cd3-48dd-bfa6-ef839031e2e8","source_url":"https://github.com/amazon-science/dualkv-flash-attn-for-rl","title":"amazon-science/dualkv-flash-attn-for-rl","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-27T17:38:58+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"8af28f0c-7331-4b08-b517-e18b3555e503","url":"https://onlylabs.fyi/signals/8af28f0c-7331-4b08-b517-e18b3555e503","source_url":"https://github.com/amazon-science/EvoMAS","title":"amazon-science/EvoMAS","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-19T19:23:29+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e3ff8718-7daa-4ebd-a3e6-3d825c538b74","url":"https://onlylabs.fyi/signals/e3ff8718-7daa-4ebd-a3e6-3d825c538b74","source_url":"https://github.com/amazon-science/adaptive-layerwise-perturbation","title":"amazon-science/adaptive-layerwise-perturbation","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-14T17:44:17+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"}]}