{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Alignment Faking","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096","json_url":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096/signal.json","generated_at":"2026-06-11T04:15:25.549314+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096","signal_json":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096/signal.json","source":"https://www.anthropic.com/research/alignment-faking","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"Anthropic published Alignment Faking. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Alignment faking in large language models \\ Anthropic Alignment Alignment faking in large language models Dec 18, 2024 Read the paper Most of us have encountered.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Safety and policy in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/research/alignment-faking","source_host":"anthropic.com","occurred_at":"2024-12-18T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"alignment","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/alignment-faking"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:15:25.549314+00:00"},"data_business":{"matches":true,"lanes":[{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["alignment"],"score":13,"reason":"Anthropic has a writing signal matching safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Safety and policy?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/research/alignment-faking","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Alignment Faking\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Alignment Faking","text":"Anthropic published Alignment Faking."},{"subject":"Alignment Faking","predicate":"is classified as","object":"writing signal","text":"Alignment Faking is classified as writing signal."},{"subject":"Alignment Faking","predicate":"belongs to","object":"talking desk","text":"Alignment Faking belongs to talking desk."},{"subject":"Alignment Faking","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Alignment Faking has evidence coverage 1 captured evidence page."},{"subject":"Alignment Faking","predicate":"matches data-business lanes","object":"Safety and policy","text":"Alignment Faking matches data-business lanes Safety and policy."},{"subject":"Alignment Faking","predicate":"has captured page count","object":"1","text":"Alignment Faking has captured page count 1."},{"subject":"Alignment Faking","predicate":"has readable page count","object":"1","text":"Alignment Faking has readable page count 1."},{"subject":"Alignment Faking","predicate":"has related signal count","object":"6","text":"Alignment Faking has related signal count 6."},{"subject":"Alignment Faking","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Alignment Faking has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Alignment Faking","predicate":"has source host","object":"anthropic.com","text":"Alignment Faking has source host anthropic.com."},{"subject":"Alignment Faking","predicate":"has lab","object":"Anthropic","text":"Alignment Faking has lab Anthropic."},{"subject":"Alignment Faking","predicate":"has signal desk","object":"talking","text":"Alignment Faking has signal desk talking."},{"subject":"Alignment Faking","predicate":"has source host","object":"anthropic.com","text":"Alignment Faking has source host anthropic.com."},{"subject":"Alignment Faking","predicate":"has radar lane","object":"Safety and policy","text":"Alignment Faking has radar lane Safety and policy."},{"subject":"Alignment Faking","predicate":"has matched term","object":"alignment","text":"Alignment Faking has matched term alignment."},{"subject":"Alignment Faking","predicate":"has watch term","object":"RL environments","text":"Alignment Faking has watch term RL environments."},{"subject":"Alignment Faking","predicate":"has watch term","object":"Infrastructure","text":"Alignment Faking has watch term Infrastructure."},{"subject":"Alignment Faking","predicate":"has watch term","object":"Safety and alignment","text":"Alignment Faking has watch term Safety and alignment."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Alignment Faking. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Alignment faking in large language models \\ Anthropic Alignment Alignment faking in large language models Dec 18, 2024 Read the paper Most of us have encountered.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Safety and policy in the data-business radar.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Alignment Faking","text":"Anthropic published Alignment Faking."},{"subject":"Alignment Faking","predicate":"is classified as","object":"writing signal","text":"Alignment Faking is classified as writing signal."},{"subject":"Alignment Faking","predicate":"belongs to","object":"talking desk","text":"Alignment Faking belongs to talking desk."},{"subject":"Alignment Faking","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Alignment Faking has evidence coverage 1 captured evidence page."},{"subject":"Alignment Faking","predicate":"matches data-business lanes","object":"Safety and policy","text":"Alignment Faking matches data-business lanes Safety and policy."}]},"signal":{"id":"15ca0d20-b3da-4c4c-b84a-d6741f856096","url":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096","json_url":"https://onlylabs.fyi/signals/15ca0d20-b3da-4c4c-b84a-d6741f856096/signal.json","source_url":"https://www.anthropic.com/research/alignment-faking","title":"Alignment Faking","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2024-12-18T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/alignment-faking"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":13,"matched_terms":["alignment"],"reason":"Anthropic has a writing signal matching safety and policy."}},"primary_evidence_page":{"url":"https://www.anthropic.com/research/alignment-faking","final_url":"https://www.anthropic.com/research/alignment-faking","title":"Alignment Faking","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:15:25.549314+00:00","bytes":154405,"raw_path":"c2cfd72baafd64a9812693a497d820927a2703c27722525b514e61467df19717.html","content_hash":"1349fb456cd3a6a39b39e031ebfec961f25b69cb90e8dadafb6ba8c14953b149","excerpt_chars":1200,"truncated":true,"excerpt":"Alignment faking in large language models \\ Anthropic Alignment Alignment faking in large language models Dec 18, 2024 Read the paper Most of us have encountered situations where someone appears to share our views or values, but is in fact only pretending to do so—a behavior that we might call “alignment faking”. Alignment faking occurs in literature: Consider the character of Iago in Shakespeare’s Othello , who acts as if he’s the eponymous character’s loyal friend while subverting and undermining him. It occurs in real life: Consider a politician who claims to support a particular cause in order to get elected, only to drop it as soon as they’re in office. Could AI models also display alignment faking? When models are trained using reinforcement learning, they’re rewarded for outputs that accord with certain pre-determined principles. But what if a model, via its prior training, has principles or preferences that conflict with what’s later rewarded in reinforcement learning? Imagine, for example, a model that learned early in training to adopt a partisan slant, but which is later trained to be politically neutral. In such a situation, a sophisticated enough model might “play..."},"evidence_pages":[{"url":"https://www.anthropic.com/research/alignment-faking","final_url":"https://www.anthropic.com/research/alignment-faking","title":"Alignment Faking","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:15:25.549314+00:00","bytes":154405,"raw_path":"c2cfd72baafd64a9812693a497d820927a2703c27722525b514e61467df19717.html","content_hash":"1349fb456cd3a6a39b39e031ebfec961f25b69cb90e8dadafb6ba8c14953b149","excerpt_chars":1200,"truncated":true,"excerpt":"Alignment faking in large language models \\ Anthropic Alignment Alignment faking in large language models Dec 18, 2024 Read the paper Most of us have encountered situations where someone appears to share our views or values, but is in fact only pretending to do so—a behavior that we might call “alignment faking”. Alignment faking occurs in literature: Consider the character of Iago in Shakespeare’s Othello , who acts as if he’s the eponymous character’s loyal friend while subverting and undermining him. It occurs in real life: Consider a politician who claims to support a particular cause in order to get elected, only to drop it as soon as they’re in office. Could AI models also display alignment faking? When models are trained using reinforcement learning, they’re rewarded for outputs that accord with certain pre-determined principles. But what if a model, via its prior training, has principles or preferences that conflict with what’s later rewarded in reinforcement learning? Imagine, for example, a model that learned early in training to adopt a partisan slant, but which is later trained to be politically neutral. In such a situation, a sophisticated enough model might “play..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}