{"schema_version":"onlylabs.public_signal.v1","title":"Amazon (Nova) Repo: amazon-science/SOP-Bench","description":"Amazon (Nova) repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585","json_url":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585/signal.json","generated_at":"2026-06-11T19:49:30.095Z","evidence_latest_fetched_at":"2026-06-11T02:53:09.34901+00:00","signal_first_seen_at":"2026-06-05T20:58:37.464059+00:00","org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/amazon","dossier_json_url":"https://onlylabs.fyi/labs/amazon/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585","signal_json":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585/signal.json","source":"https://github.com/amazon-science/SOP-Bench","lab_dossier":"https://onlylabs.fyi/labs/amazon","lab_dossier_json":"https://onlylabs.fyi/labs/amazon/dossier.json","analysis":"https://onlylabs.fyi/analysis/amazon","analysis_json":"https://onlylabs.fyi/analysis/amazon/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/amazon/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"Amazon (Nova) published amazon-science/SOP-Bench (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo amazon-science/SOP-Bench · language Python · New benchmark from Amazon, low stars. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/amazon-science/SOP-Bench","source_host":"github.com","occurred_at":"2026-02-01T21:42:38+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Amazon (Nova)","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"amazon-science/SOP-Bench","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"21","source":"traction"},{"label":"Notability","value":"New benchmark from Amazon, low stars","source":"signal"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/amazon-science/SOP-Bench"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T02:53:09.34901+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585/signal.json","dossier_json":"https://onlylabs.fyi/labs/amazon/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/amazon/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/amazon/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585/signal.json","required":true},{"label":"source","url":"https://github.com/amazon-science/SOP-Bench","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/amazon/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/amazon/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Amazon (Nova)'s repo signal \"amazon-science/SOP-Bench\" for frontier lab strategy."},"semantic_triples":[{"subject":"Amazon (Nova)","predicate":"published repo","object":"amazon-science/SOP-Bench","text":"Amazon (Nova) published repo amazon-science/SOP-Bench."},{"subject":"amazon-science/SOP-Bench","predicate":"is classified as","object":"repo signal","text":"amazon-science/SOP-Bench is classified as repo signal."},{"subject":"amazon-science/SOP-Bench","predicate":"belongs to","object":"repos desk","text":"amazon-science/SOP-Bench belongs to repos desk."},{"subject":"amazon-science/SOP-Bench","predicate":"has context","object":"Python","text":"amazon-science/SOP-Bench has context Python."},{"subject":"amazon-science/SOP-Bench","predicate":"has evidence coverage","object":"1 captured evidence page","text":"amazon-science/SOP-Bench has evidence coverage 1 captured evidence page."},{"subject":"amazon-science/SOP-Bench","predicate":"has captured page count","object":"1","text":"amazon-science/SOP-Bench has captured page count 1."},{"subject":"amazon-science/SOP-Bench","predicate":"has readable page count","object":"1","text":"amazon-science/SOP-Bench has readable page count 1."},{"subject":"amazon-science/SOP-Bench","predicate":"has related signal count","object":"6","text":"amazon-science/SOP-Bench has related signal count 6."},{"subject":"amazon-science/SOP-Bench","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"amazon-science/SOP-Bench has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"amazon-science/SOP-Bench","predicate":"has source host","object":"github.com","text":"amazon-science/SOP-Bench has source host github.com."},{"subject":"amazon-science/SOP-Bench","predicate":"has lab","object":"Amazon (Nova)","text":"amazon-science/SOP-Bench has lab Amazon (Nova)."},{"subject":"amazon-science/SOP-Bench","predicate":"has signal desk","object":"repos","text":"amazon-science/SOP-Bench has signal desk repos."},{"subject":"amazon-science/SOP-Bench","predicate":"has source host","object":"github.com","text":"amazon-science/SOP-Bench has source host github.com."},{"subject":"amazon-science/SOP-Bench","predicate":"has repository","object":"amazon-science/SOP-Bench","text":"amazon-science/SOP-Bench has repository amazon-science/SOP-Bench."},{"subject":"amazon-science/SOP-Bench","predicate":"has language","object":"Python","text":"amazon-science/SOP-Bench has language Python."},{"subject":"amazon-science/SOP-Bench","predicate":"has stars","object":"21","text":"amazon-science/SOP-Bench has stars 21."},{"subject":"amazon-science/SOP-Bench","predicate":"has notability","object":"New benchmark from Amazon, low stars","text":"amazon-science/SOP-Bench has notability New benchmark from Amazon, low stars."},{"subject":"amazon-science/SOP-Bench","predicate":"has watch term","object":"Eval methodology","text":"amazon-science/SOP-Bench has watch term Eval methodology."}]},"intelligence":{"signal_desk":"repos","answer":"Amazon (Nova) published amazon-science/SOP-Bench (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo amazon-science/SOP-Bench · language Python · New benchmark from Amazon, low stars. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"Amazon (Nova)","predicate":"published repo","object":"amazon-science/SOP-Bench","text":"Amazon (Nova) published repo amazon-science/SOP-Bench."},{"subject":"amazon-science/SOP-Bench","predicate":"is classified as","object":"repo signal","text":"amazon-science/SOP-Bench is classified as repo signal."},{"subject":"amazon-science/SOP-Bench","predicate":"belongs to","object":"repos desk","text":"amazon-science/SOP-Bench belongs to repos desk."},{"subject":"amazon-science/SOP-Bench","predicate":"has context","object":"Python","text":"amazon-science/SOP-Bench has context Python."},{"subject":"amazon-science/SOP-Bench","predicate":"has evidence coverage","object":"1 captured evidence page","text":"amazon-science/SOP-Bench has evidence coverage 1 captured evidence page."}]},"signal":{"id":"de8614c6-7a01-4856-b1df-e5eff189b585","url":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585","json_url":"https://onlylabs.fyi/signals/de8614c6-7a01-4856-b1df-e5eff189b585/signal.json","source_url":"https://github.com/amazon-science/SOP-Bench","title":"amazon-science/SOP-Bench","summary":"Amazon (Nova) published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-02-01T21:42:38+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/amazon-science/SOP-Bench"]},"facets":{"repo":"amazon-science/SOP-Bench","language":"Python"},"traction":{"github_stars":21,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"is_primary":true,"source_match":true,"url":"https://github.com/amazon-science/SOP-Bench","final_url":"https://github.com/amazon-science/SOP-Bench","title":"amazon-science/SOP-Bench repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T02:53:09.34901+00:00","bytes":31350,"raw_path":"02948bd2b6bfe30089b41bf8716a212f4656017490262504a45866b1ca9e2d02.json","content_hash":"67c52b0d70dce3bd4dc1c3596f5e8dfe743ffee62804bcf86b2c6bf665b2d829","excerpt_chars":1200,"truncated":true,"excerpt":"amazon-science/SOP-Bench Language: Python License: NOASSERTION Stars: 21 Forks: 2 Open issues: 6 Created: 2026-02-01T21:42:38Z Pushed: 2026-06-06T00:04:59Z Default branch: main Fork: no Archived: no README: SOP-Bench : Complex Industrial SOPs for Evaluating LLM Agents [![Lint](https://github.com/amazon-science/SOP-Bench/actions/workflows/lint.yml/badge.svg)](https://github.com/amazon-science/SOP-Bench/actions/workflows/lint.yml) [![Tests](https://github.com/amazon-science/SOP-Bench/actions/workflows/test.yml/badge.svg)](https://github.com/amazon-science/SOP-Bench/actions/workflows/test.yml) Overview **SOP-Bench** is a comprehensive benchmark for evaluating LLM-based agents on complex, multi-step Standard Operating Procedures (SOPs) that are fundamental to industrial automation. Built from 2,000+ tasks across 12 industrial domains (healthcare, logistics, finance, content moderation, etc.), SOP-Bench addresses the gap between existing benchmarks and real-world procedural complexity. 🏭 **Human Expert-Authored SOPs** · 🤖 **Human-AI Collaborative Framework** · 📊 **Executable Interfaces** · 🔧 **Two Agent Architectures** · 📈 **11 Frontier Models Evaluated** News - **[2026-02]** 🎉..."},"evidence_pages":[],"related_signals":[{"id":"087c32a2-6ad0-4981-9315-11fdd32a0153","url":"https://onlylabs.fyi/signals/087c32a2-6ad0-4981-9315-11fdd32a0153","source_url":"https://github.com/amazon-science/reskill","title":"amazon-science/reskill","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-06-04T02:13:35+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e5701aed-6cd3-48dd-bfa6-ef839031e2e8","url":"https://onlylabs.fyi/signals/e5701aed-6cd3-48dd-bfa6-ef839031e2e8","source_url":"https://github.com/amazon-science/dualkv-flash-attn-for-rl","title":"amazon-science/dualkv-flash-attn-for-rl","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-27T17:38:58+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"8af28f0c-7331-4b08-b517-e18b3555e503","url":"https://onlylabs.fyi/signals/8af28f0c-7331-4b08-b517-e18b3555e503","source_url":"https://github.com/amazon-science/EvoMAS","title":"amazon-science/EvoMAS","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-19T19:23:29+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e3ff8718-7daa-4ebd-a3e6-3d825c538b74","url":"https://onlylabs.fyi/signals/e3ff8718-7daa-4ebd-a3e6-3d825c538b74","source_url":"https://github.com/amazon-science/adaptive-layerwise-perturbation","title":"amazon-science/adaptive-layerwise-perturbation","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-14T17:44:17+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"9afcd328-0124-485c-8ace-9c3ad546e316","url":"https://onlylabs.fyi/signals/9afcd328-0124-485c-8ace-9c3ad546e316","source_url":"https://github.com/amazon-science/temporal-reasoning-dataset","title":"amazon-science/temporal-reasoning-dataset","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-13T13:07:08+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"},{"id":"e19ce80b-3d6a-4aaf-9b1a-82d1b19ab682","url":"https://onlylabs.fyi/signals/e19ce80b-3d6a-4aaf-9b1a-82d1b19ab682","source_url":"https://github.com/amazon-science/PROF-GRPO","title":"amazon-science/PROF-GRPO","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"occurred_at":"2026-05-12T19:43:55+00:00","first_seen_at":"2026-06-05T20:58:37.464059+00:00","date_source":"source"}]}