{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a","json_url":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a/signal.json","generated_at":"2026-06-11T04:17:41.557793+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a","signal_json":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a/signal.json","source":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"Anthropic published Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback \\ Anthropic Alignment Research Training a Helpful and Harmless Assistant with.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","source_host":"anthropic.com","occurred_at":"2022-04-12T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:17:41.557793+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["training"],"score":13,"reason":"Anthropic has a writing signal matching infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","text":"Anthropic published Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"is classified as","object":"writing signal","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback is classified as writing signal."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"belongs to","object":"talking desk","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback belongs to talking desk."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has evidence coverage 1 captured evidence page."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"matches data-business lanes","object":"Infrastructure","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback matches data-business lanes Infrastructure."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has captured page count","object":"1","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has captured page count 1."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has readable page count","object":"1","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has readable page count 1."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has related signal count","object":"6","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has related signal count 6."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has source host","object":"anthropic.com","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has source host anthropic.com."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has lab","object":"Anthropic","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has lab Anthropic."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has signal desk","object":"talking","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has signal desk talking."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has source host","object":"anthropic.com","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has source host anthropic.com."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has radar lane","object":"Infrastructure","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has radar lane Infrastructure."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has matched term","object":"training","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has matched term training."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has watch term","object":"RL environments","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has watch term RL environments."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has watch term","object":"Eval methodology","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has watch term Eval methodology."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has watch term","object":"Data pipeline","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has watch term Data pipeline."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback \\ Anthropic Alignment Research Training a Helpful and Harmless Assistant with.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","text":"Anthropic published Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"is classified as","object":"writing signal","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback is classified as writing signal."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"belongs to","object":"talking desk","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback belongs to talking desk."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback has evidence coverage 1 captured evidence page."},{"subject":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","predicate":"matches data-business lanes","object":"Infrastructure","text":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback matches data-business lanes Infrastructure."}]},"signal":{"id":"edcac1d8-6ab2-47af-9b6e-82247094e16a","url":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a","json_url":"https://onlylabs.fyi/signals/edcac1d8-6ab2-47af-9b6e-82247094e16a/signal.json","source_url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","title":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2022-04-12T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":13,"matched_terms":["training"],"reason":"Anthropic has a writing signal matching infrastructure."}},"primary_evidence_page":{"url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","final_url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","title":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:17:41.557793+00:00","bytes":107132,"raw_path":"01919702dd963f27f9f6a74d82107d39af66e536cd6410ecdee1ae57075c9ed2.html","content_hash":"dad8a667794e6df0b113e010aec9928408d13f2d896bf5effc9b662cb3954155","excerpt_chars":1200,"truncated":true,"excerpt":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback \\ Anthropic Alignment Research Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback Apr 12, 2022 Read Paper Abstract We apply preference modeling and reinforcement learning from human feedback (RLHF) to finetune language models to act as helpful and harmless assistants. We find this alignment training improves performance on almost all NLP evaluations, and is fully compatible with training for specialized skills such as python coding and summarization. We explore an iterated online mode of training, where preference models and RL policies are updated on a weekly cadence with fresh human feedback data, efficiently improving our datasets and models. Finally, we investigate the robustness of RLHF training, and identify a roughly linear relation between the RL reward and the square root of the KL divergence between the policy and its initialization. Alongside our main results, we perform peripheral analyses on calibration, competing objectives, and the use of OOD detection, compare our models with human writers, and provide samples from our models using..."},"evidence_pages":[{"url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","final_url":"https://www.anthropic.com/research/training-a-helpful-and-harmless-assistant-with-reinforcement-learning-from-human-feedback","title":"Training A Helpful And Harmless Assistant With Reinforcement Learning From Human Feedback","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T04:17:41.557793+00:00","bytes":107132,"raw_path":"01919702dd963f27f9f6a74d82107d39af66e536cd6410ecdee1ae57075c9ed2.html","content_hash":"dad8a667794e6df0b113e010aec9928408d13f2d896bf5effc9b662cb3954155","excerpt_chars":1200,"truncated":true,"excerpt":"Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback \\ Anthropic Alignment Research Training a Helpful and Harmless Assistant with Reinforcement Learning from Human Feedback Apr 12, 2022 Read Paper Abstract We apply preference modeling and reinforcement learning from human feedback (RLHF) to finetune language models to act as helpful and harmless assistants. We find this alignment training improves performance on almost all NLP evaluations, and is fully compatible with training for specialized skills such as python coding and summarization. We explore an iterated online mode of training, where preference models and RL policies are updated on a weekly cadence with fresh human feedback data, efficiently improving our datasets and models. Finally, we investigate the robustness of RLHF training, and identify a roughly linear relation between the RL reward and the square root of the KL divergence between the policy and its initialization. Alongside our main results, we perform peripheral analyses on calibration, competing objectives, and the use of OOD detection, compare our models with human writers, and provide samples from our models using..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}