{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Learning from human preferences","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb","json_url":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb/signal.json","generated_at":"2026-06-08T15:47:15.051+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb","signal_json":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb/signal.json","source":"https://openai.com/index/learning-from-human-preferences","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Learning from human preferences. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Learning from human preferences | OpenAI June 13, 2017 Learning from human preferences Loading… Share One step towards building safe AI systems is to remove the need for.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure, Safety and policy in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/learning-from-human-preferences","source_host":"openai.com","occurred_at":"2017-06-13T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"systems","source":"radar"},{"label":"Matched term","value":"safety","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/learning-from-human-preferences"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:47:15.051+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["systems","safety"],"score":25,"reason":"OpenAI has a writing signal matching infrastructure, safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure, Safety and policy?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb/signal.json","required":true},{"label":"source","url":"https://openai.com/index/learning-from-human-preferences","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Learning from human preferences\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Learning from human preferences","text":"OpenAI published Learning from human preferences."},{"subject":"Learning from human preferences","predicate":"is classified as","object":"writing signal","text":"Learning from human preferences is classified as writing signal."},{"subject":"Learning from human preferences","predicate":"belongs to","object":"talking desk","text":"Learning from human preferences belongs to talking desk."},{"subject":"Learning from human preferences","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Learning from human preferences has evidence coverage 1 captured evidence page."},{"subject":"Learning from human preferences","predicate":"matches data-business lanes","object":"Infrastructure, Safety and policy","text":"Learning from human preferences matches data-business lanes Infrastructure, Safety and policy."},{"subject":"Learning from human preferences","predicate":"has captured page count","object":"1","text":"Learning from human preferences has captured page count 1."},{"subject":"Learning from human preferences","predicate":"has readable page count","object":"1","text":"Learning from human preferences has readable page count 1."},{"subject":"Learning from human preferences","predicate":"has related signal count","object":"6","text":"Learning from human preferences has related signal count 6."},{"subject":"Learning from human preferences","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Learning from human preferences has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Learning from human preferences","predicate":"has source host","object":"openai.com","text":"Learning from human preferences has source host openai.com."},{"subject":"Learning from human preferences","predicate":"has lab","object":"OpenAI","text":"Learning from human preferences has lab OpenAI."},{"subject":"Learning from human preferences","predicate":"has signal desk","object":"talking","text":"Learning from human preferences has signal desk talking."},{"subject":"Learning from human preferences","predicate":"has source host","object":"openai.com","text":"Learning from human preferences has source host openai.com."},{"subject":"Learning from human preferences","predicate":"has radar lane","object":"Infrastructure","text":"Learning from human preferences has radar lane Infrastructure."},{"subject":"Learning from human preferences","predicate":"has radar lane","object":"Safety and policy","text":"Learning from human preferences has radar lane Safety and policy."},{"subject":"Learning from human preferences","predicate":"has matched term","object":"systems","text":"Learning from human preferences has matched term systems."},{"subject":"Learning from human preferences","predicate":"has matched term","object":"safety","text":"Learning from human preferences has matched term safety."},{"subject":"Learning from human preferences","predicate":"has watch term","object":"RL environments","text":"Learning from human preferences has watch term RL environments."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Learning from human preferences. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Learning from human preferences | OpenAI June 13, 2017 Learning from human preferences Loading… Share One step towards building safe AI systems is to remove the need for.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure, Safety and policy in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Learning from human preferences","text":"OpenAI published Learning from human preferences."},{"subject":"Learning from human preferences","predicate":"is classified as","object":"writing signal","text":"Learning from human preferences is classified as writing signal."},{"subject":"Learning from human preferences","predicate":"belongs to","object":"talking desk","text":"Learning from human preferences belongs to talking desk."},{"subject":"Learning from human preferences","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Learning from human preferences has evidence coverage 1 captured evidence page."},{"subject":"Learning from human preferences","predicate":"matches data-business lanes","object":"Infrastructure, Safety and policy","text":"Learning from human preferences matches data-business lanes Infrastructure, Safety and policy."}]},"signal":{"id":"e84d143a-84d1-4e5d-83e8-712d4591cfbb","url":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb","json_url":"https://onlylabs.fyi/signals/e84d143a-84d1-4e5d-83e8-712d4591cfbb/signal.json","source_url":"https://openai.com/index/learning-from-human-preferences","title":"Learning from human preferences","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2017-06-13T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/learning-from-human-preferences"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":25,"matched_terms":["systems","safety"],"reason":"OpenAI has a writing signal matching infrastructure, safety and policy."}},"primary_evidence_page":{"url":"https://openai.com/index/learning-from-human-preferences","final_url":"https://openai.com/index/learning-from-human-preferences","title":"Learning from human preferences","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:15.051+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Learning from human preferences | OpenAI June 13, 2017 Learning from human preferences Loading… Share One step towards building safe AI systems is to remove the need for humans to write goal functions, since using a simple proxy for a complex goal, or getting the complex goal a bit wrong, can lead to undesirable and even dangerous behavior. In collaboration with DeepMind’s safety team, we’ve developed an algorithm which can infer what humans want by being told which of two proposed behaviors is better. We present a learning algorithm that uses small amounts of human feedback to solve modern RL environments. Machine learning systems with human feedback have⁠ been⁠ explored⁠ before⁠, but we’ve scaled up the approach to be able to work on much more complicated tasks. Our algorithm needed 900 bits of feedback from a human evaluator to learn to backflip—a seemingly simple task which is simple to judge but challenging⁠ to specify. Our algorithm learned to backflip using around 900 individual bits of feedback from the human evaluator. The overall training process is a 3-step feedback cycle between the human, the agent’s understanding of the goal, and the RL training. Our AI agent starts..."},"evidence_pages":[{"url":"https://openai.com/index/learning-from-human-preferences","final_url":"https://openai.com/index/learning-from-human-preferences","title":"Learning from human preferences","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:15.051+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Learning from human preferences | OpenAI June 13, 2017 Learning from human preferences Loading… Share One step towards building safe AI systems is to remove the need for humans to write goal functions, since using a simple proxy for a complex goal, or getting the complex goal a bit wrong, can lead to undesirable and even dangerous behavior. In collaboration with DeepMind’s safety team, we’ve developed an algorithm which can infer what humans want by being told which of two proposed behaviors is better. We present a learning algorithm that uses small amounts of human feedback to solve modern RL environments. Machine learning systems with human feedback have⁠ been⁠ explored⁠ before⁠, but we’ve scaled up the approach to be able to work on much more complicated tasks. Our algorithm needed 900 bits of feedback from a human evaluator to learn to backflip—a seemingly simple task which is simple to judge but challenging⁠ to specify. Our algorithm learned to backflip using around 900 individual bits of feedback from the human evaluator. The overall training process is a 3-step feedback cycle between the human, the agent’s understanding of the goal, and the RL training. Our AI agent starts..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}