{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Towards Understanding Sycophancy In Language Models","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b","json_url":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b/signal.json","generated_at":"2026-06-11T04:17:41.702397+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b","signal_json":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b/signal.json","source":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":null},"answer_pack":{"answer":"Anthropic published Towards Understanding Sycophancy In Language Models. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Towards Understanding Sycophancy in Language Models \\ Anthropic Alignment Research Towards Understanding Sycophancy in Language Models Oct 23, 2023 Read Paper Abstract.... onlylabs links this event to 1 captured evidence page and 6 related writing signals.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","source_host":"anthropic.com","occurred_at":"2023-10-23T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:17:41.702397+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Towards Understanding Sycophancy In Language Models\" for frontier lab strategy."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Towards Understanding Sycophancy In Language Models","text":"Anthropic published Towards Understanding Sycophancy In Language Models."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"is classified as","object":"writing signal","text":"Towards Understanding Sycophancy In Language Models is classified as writing signal."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"belongs to","object":"talking desk","text":"Towards Understanding Sycophancy In Language Models belongs to talking desk."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Towards Understanding Sycophancy In Language Models has evidence coverage 1 captured evidence page."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has captured page count","object":"1","text":"Towards Understanding Sycophancy In Language Models has captured page count 1."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has readable page count","object":"1","text":"Towards Understanding Sycophancy In Language Models has readable page count 1."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has related signal count","object":"6","text":"Towards Understanding Sycophancy In Language Models has related signal count 6."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Towards Understanding Sycophancy In Language Models has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has source host","object":"anthropic.com","text":"Towards Understanding Sycophancy In Language Models has source host anthropic.com."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has lab","object":"Anthropic","text":"Towards Understanding Sycophancy In Language Models has lab Anthropic."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has signal desk","object":"talking","text":"Towards Understanding Sycophancy In Language Models has signal desk talking."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has source host","object":"anthropic.com","text":"Towards Understanding Sycophancy In Language Models has source host anthropic.com."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has watch term","object":"RL environments","text":"Towards Understanding Sycophancy In Language Models has watch term RL environments."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has watch term","object":"Infrastructure","text":"Towards Understanding Sycophancy In Language Models has watch term Infrastructure."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has watch term","object":"Safety and alignment","text":"Towards Understanding Sycophancy In Language Models has watch term Safety and alignment."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has watch term","object":"Agents and tool use","text":"Towards Understanding Sycophancy In Language Models has watch term Agents and tool use."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Towards Understanding Sycophancy In Language Models. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Towards Understanding Sycophancy in Language Models \\ Anthropic Alignment Research Towards Understanding Sycophancy in Language Models Oct 23, 2023 Read Paper Abstract.... onlylabs links this event to 1 captured evidence page and 6 related writing signals.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Towards Understanding Sycophancy In Language Models","text":"Anthropic published Towards Understanding Sycophancy In Language Models."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"is classified as","object":"writing signal","text":"Towards Understanding Sycophancy In Language Models is classified as writing signal."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"belongs to","object":"talking desk","text":"Towards Understanding Sycophancy In Language Models belongs to talking desk."},{"subject":"Towards Understanding Sycophancy In Language Models","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Towards Understanding Sycophancy In Language Models has evidence coverage 1 captured evidence page."}]},"signal":{"id":"05f2bf54-b87f-4359-8475-ba5f3d22d47b","url":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b","json_url":"https://onlylabs.fyi/signals/05f2bf54-b87f-4359-8475-ba5f3d22d47b/signal.json","source_url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","title":"Towards Understanding Sycophancy In Language Models","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2023-10-23T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","final_url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","title":"Towards Understanding Sycophancy In Language Models","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:17:41.702397+00:00","bytes":106256,"raw_path":"6aca063a1249c289c12938814b62c5fd0b54fe2d0c31e6db70e60b2c4ab8b506.html","content_hash":"d82dab5ebec65d5741247842a00a4b12f76075d7555f1dc1901d3821771abbfe","excerpt_chars":1200,"truncated":true,"excerpt":"Towards Understanding Sycophancy in Language Models \\ Anthropic Alignment Research Towards Understanding Sycophancy in Language Models Oct 23, 2023 Read Paper Abstract Reinforcement learning from human feedback (RLHF) is a popular technique for training high-quality AI assistants. However, RLHF may also encourage model responses that match user beliefs over truthful responses, a behavior known as sycophancy. We investigate the prevalence of sycophancy in RLHF-trained models and whether human preference judgments are responsible. We first demonstrate that five state-of-the-art AI assistants consistently exhibit sycophancy behavior across four varied free-form text-generation tasks. To understand if human preferences drive this broadly observed behavior of RLHF models, we analyze existing human preference data. We find that when a response matches a user’s views, it is more likely to be preferred. Moreover, both humans and preference models (PMs) prefer convincingly-written sycophantic responses over correct ones a non-negligible fraction of the time. Optimizing model outputs against PMs also sometimes sacrifices truthfulness in favor of sycophancy. Overall, our results indicate..."},"evidence_pages":[{"url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","final_url":"https://www.anthropic.com/research/towards-understanding-sycophancy-in-language-models","title":"Towards Understanding Sycophancy In Language Models","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:17:41.702397+00:00","bytes":106256,"raw_path":"6aca063a1249c289c12938814b62c5fd0b54fe2d0c31e6db70e60b2c4ab8b506.html","content_hash":"d82dab5ebec65d5741247842a00a4b12f76075d7555f1dc1901d3821771abbfe","excerpt_chars":1200,"truncated":true,"excerpt":"Towards Understanding Sycophancy in Language Models \\ Anthropic Alignment Research Towards Understanding Sycophancy in Language Models Oct 23, 2023 Read Paper Abstract Reinforcement learning from human feedback (RLHF) is a popular technique for training high-quality AI assistants. However, RLHF may also encourage model responses that match user beliefs over truthful responses, a behavior known as sycophancy. We investigate the prevalence of sycophancy in RLHF-trained models and whether human preference judgments are responsible. We first demonstrate that five state-of-the-art AI assistants consistently exhibit sycophancy behavior across four varied free-form text-generation tasks. To understand if human preferences drive this broadly observed behavior of RLHF models, we analyze existing human preference data. We find that when a response matches a user’s views, it is more likely to be preferred. Moreover, both humans and preference models (PMs) prefer convincingly-written sycophantic responses over correct ones a non-negligible fraction of the time. Optimizing model outputs against PMs also sometimes sacrifices truthfulness in favor of sycophancy. Overall, our results indicate..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}