{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Sabotage Evaluations","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33","json_url":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33/signal.json","generated_at":"2026-06-11T04:18:20.357415+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33","signal_json":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33/signal.json","source":"https://www.anthropic.com/research/sabotage-evaluations","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"Anthropic published Sabotage Evaluations. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Skepticism about AI safety research; seen as unnecessary or a marketing ploy. · Sabotage evaluations for frontier models \\ Anthropic Alignment Sabotage evaluations for frontier models Oct 18, 2024 Read the paper Any industry where there are.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/research/sabotage-evaluations","source_host":"anthropic.com","occurred_at":"2024-10-18T17:54:45+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"HN","value":"Skepticism about AI safety research; seen as unnecessary or a marketing ploy.","source":"source"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Matched term","value":"evaluation","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/sabotage-evaluations"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:18:20.357415+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval","evaluation"],"score":17,"reason":"Anthropic has a writing signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/research/sabotage-evaluations","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Sabotage Evaluations\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Sabotage Evaluations","text":"Anthropic published Sabotage Evaluations."},{"subject":"Sabotage Evaluations","predicate":"is classified as","object":"writing signal","text":"Sabotage Evaluations is classified as writing signal."},{"subject":"Sabotage Evaluations","predicate":"belongs to","object":"talking desk","text":"Sabotage Evaluations belongs to talking desk."},{"subject":"Sabotage Evaluations","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Sabotage Evaluations has evidence coverage 1 captured evidence page."},{"subject":"Sabotage Evaluations","predicate":"matches data-business lanes","object":"Evals and quality","text":"Sabotage Evaluations matches data-business lanes Evals and quality."},{"subject":"Sabotage Evaluations","predicate":"has captured page count","object":"1","text":"Sabotage Evaluations has captured page count 1."},{"subject":"Sabotage Evaluations","predicate":"has readable page count","object":"1","text":"Sabotage Evaluations has readable page count 1."},{"subject":"Sabotage Evaluations","predicate":"has related signal count","object":"6","text":"Sabotage Evaluations has related signal count 6."},{"subject":"Sabotage Evaluations","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Sabotage Evaluations has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Sabotage Evaluations","predicate":"has source host","object":"anthropic.com","text":"Sabotage Evaluations has source host anthropic.com."},{"subject":"Sabotage Evaluations","predicate":"has lab","object":"Anthropic","text":"Sabotage Evaluations has lab Anthropic."},{"subject":"Sabotage Evaluations","predicate":"has signal desk","object":"talking","text":"Sabotage Evaluations has signal desk talking."},{"subject":"Sabotage Evaluations","predicate":"has source host","object":"anthropic.com","text":"Sabotage Evaluations has source host anthropic.com."},{"subject":"Sabotage Evaluations","predicate":"has hn","object":"Skepticism about AI safety research; seen as unnecessary or a marketing ploy.","text":"Sabotage Evaluations has hn Skepticism about AI safety research; seen as unnecessary or a marketing ploy.."},{"subject":"Sabotage Evaluations","predicate":"has radar lane","object":"Evals and quality","text":"Sabotage Evaluations has radar lane Evals and quality."},{"subject":"Sabotage Evaluations","predicate":"has matched term","object":"eval","text":"Sabotage Evaluations has matched term eval."},{"subject":"Sabotage Evaluations","predicate":"has matched term","object":"evaluation","text":"Sabotage Evaluations has matched term evaluation."},{"subject":"Sabotage Evaluations","predicate":"has watch term","object":"RL environments","text":"Sabotage Evaluations has watch term RL environments."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Sabotage Evaluations. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Skepticism about AI safety research; seen as unnecessary or a marketing ploy. · Sabotage evaluations for frontier models \\ Anthropic Alignment Sabotage evaluations for frontier models Oct 18, 2024 Read the paper Any industry where there are.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Sabotage Evaluations","text":"Anthropic published Sabotage Evaluations."},{"subject":"Sabotage Evaluations","predicate":"is classified as","object":"writing signal","text":"Sabotage Evaluations is classified as writing signal."},{"subject":"Sabotage Evaluations","predicate":"belongs to","object":"talking desk","text":"Sabotage Evaluations belongs to talking desk."},{"subject":"Sabotage Evaluations","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Sabotage Evaluations has evidence coverage 1 captured evidence page."},{"subject":"Sabotage Evaluations","predicate":"matches data-business lanes","object":"Evals and quality","text":"Sabotage Evaluations matches data-business lanes Evals and quality."}]},"signal":{"id":"89fe0be4-a577-4fc2-ad3d-e21e2b148e33","url":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33","json_url":"https://onlylabs.fyi/signals/89fe0be4-a577-4fc2-ad3d-e21e2b148e33/signal.json","source_url":"https://www.anthropic.com/research/sabotage-evaluations","title":"Sabotage Evaluations","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2024-10-18T17:54:45+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/sabotage-evaluations"]},"facets":{},"traction":{"github_stars":null,"hn_points":64,"hn_comments":8,"hn_story_id":"41895024","hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":17,"matched_terms":["eval","evaluation"],"reason":"Anthropic has a writing signal matching evals and quality."}},"primary_evidence_page":{"url":"https://www.anthropic.com/research/sabotage-evaluations","final_url":"https://www.anthropic.com/research/sabotage-evaluations","title":"Sabotage Evaluations","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:18:20.357415+00:00","bytes":142205,"raw_path":"9d653677d03c2df3fb4355a397eecad12a2ed840c7b6a15f93178a36c1b0da0a.html","content_hash":"f5182b9b09923020a8ad629e70f6a6f049d8a5a78bc1d7331d03cd015b908bbb","excerpt_chars":1200,"truncated":true,"excerpt":"Sabotage evaluations for frontier models \\ Anthropic Alignment Sabotage evaluations for frontier models Oct 18, 2024 Read the paper Any industry where there are potential harms needs evaluations. Nuclear power stations have continuous radiation monitoring and regular site inspections; new aircraft undergo extensive flight tests to prove their airworthiness. It’s no different for AI systems. New AI models go through a wide range of safety evaluations—for example, testing their capacity to assist in the creation of biological or chemical weapons. Such evaluations are built into our Responsible Scaling Policy , which guides our development of a model’s safeguards. As AIs become more capable, however, a new kind of risk might emerge: models with the ability to mislead their users, or subvert the systems we put in place to oversee them. A new paper by the Anthropic Alignment Science team describes a novel set of evaluations that test a model’s capacity for sabotage . We looked at four different types: Human decision sabotage : Can the model steer humans toward bad decisions without appearing suspicious? Code sabotage : Can the model insert subtle bugs into codebases over time without..."},"evidence_pages":[{"url":"https://www.anthropic.com/research/sabotage-evaluations","final_url":"https://www.anthropic.com/research/sabotage-evaluations","title":"Sabotage Evaluations","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:18:20.357415+00:00","bytes":142205,"raw_path":"9d653677d03c2df3fb4355a397eecad12a2ed840c7b6a15f93178a36c1b0da0a.html","content_hash":"f5182b9b09923020a8ad629e70f6a6f049d8a5a78bc1d7331d03cd015b908bbb","excerpt_chars":1200,"truncated":true,"excerpt":"Sabotage evaluations for frontier models \\ Anthropic Alignment Sabotage evaluations for frontier models Oct 18, 2024 Read the paper Any industry where there are potential harms needs evaluations. Nuclear power stations have continuous radiation monitoring and regular site inspections; new aircraft undergo extensive flight tests to prove their airworthiness. It’s no different for AI systems. New AI models go through a wide range of safety evaluations—for example, testing their capacity to assist in the creation of biological or chemical weapons. Such evaluations are built into our Responsible Scaling Policy , which guides our development of a model’s safeguards. As AIs become more capable, however, a new kind of risk might emerge: models with the ability to mislead their users, or subvert the systems we put in place to oversee them. A new paper by the Anthropic Alignment Science team describes a novel set of evaluations that test a model’s capacity for sabotage . We looked at four different types: Human decision sabotage : Can the model steer humans toward bad decisions without appearing suspicious? Code sabotage : Can the model insert subtle bugs into codebases over time without..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}