{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Scaling laws for reward model overoptimization","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad","json_url":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad/signal.json","generated_at":"2026-06-08T15:46:45.683+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad","signal_json":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad/signal.json","source":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Scaling laws for reward model overoptimization. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Scaling laws for reward model overoptimization | OpenAI October 19, 2022 Scaling laws for reward model overoptimization Loading… Share Abstract In reinforcement learning.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","source_host":"openai.com","occurred_at":"2022-10-19T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"scaling","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/scaling-laws-for-reward-model-overoptimization"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:46:45.683+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["scaling"],"score":13,"reason":"OpenAI has a writing signal matching infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad/signal.json","required":true},{"label":"source","url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Scaling laws for reward model overoptimization\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Scaling laws for reward model overoptimization","text":"OpenAI published Scaling laws for reward model overoptimization."},{"subject":"Scaling laws for reward model overoptimization","predicate":"is classified as","object":"writing signal","text":"Scaling laws for reward model overoptimization is classified as writing signal."},{"subject":"Scaling laws for reward model overoptimization","predicate":"belongs to","object":"talking desk","text":"Scaling laws for reward model overoptimization belongs to talking desk."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Scaling laws for reward model overoptimization has evidence coverage 1 captured evidence page."},{"subject":"Scaling laws for reward model overoptimization","predicate":"matches data-business lanes","object":"Infrastructure","text":"Scaling laws for reward model overoptimization matches data-business lanes Infrastructure."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has captured page count","object":"1","text":"Scaling laws for reward model overoptimization has captured page count 1."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has readable page count","object":"1","text":"Scaling laws for reward model overoptimization has readable page count 1."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has related signal count","object":"6","text":"Scaling laws for reward model overoptimization has related signal count 6."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Scaling laws for reward model overoptimization has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has source host","object":"openai.com","text":"Scaling laws for reward model overoptimization has source host openai.com."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has lab","object":"OpenAI","text":"Scaling laws for reward model overoptimization has lab OpenAI."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has signal desk","object":"talking","text":"Scaling laws for reward model overoptimization has signal desk talking."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has source host","object":"openai.com","text":"Scaling laws for reward model overoptimization has source host openai.com."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has radar lane","object":"Infrastructure","text":"Scaling laws for reward model overoptimization has radar lane Infrastructure."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has matched term","object":"scaling","text":"Scaling laws for reward model overoptimization has matched term scaling."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has watch term","object":"RL environments","text":"Scaling laws for reward model overoptimization has watch term RL environments."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has watch term","object":"Data pipeline","text":"Scaling laws for reward model overoptimization has watch term Data pipeline."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has watch term","object":"Infrastructure","text":"Scaling laws for reward model overoptimization has watch term Infrastructure."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Scaling laws for reward model overoptimization. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Scaling laws for reward model overoptimization | OpenAI October 19, 2022 Scaling laws for reward model overoptimization Loading… Share Abstract In reinforcement learning.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Scaling laws for reward model overoptimization","text":"OpenAI published Scaling laws for reward model overoptimization."},{"subject":"Scaling laws for reward model overoptimization","predicate":"is classified as","object":"writing signal","text":"Scaling laws for reward model overoptimization is classified as writing signal."},{"subject":"Scaling laws for reward model overoptimization","predicate":"belongs to","object":"talking desk","text":"Scaling laws for reward model overoptimization belongs to talking desk."},{"subject":"Scaling laws for reward model overoptimization","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Scaling laws for reward model overoptimization has evidence coverage 1 captured evidence page."},{"subject":"Scaling laws for reward model overoptimization","predicate":"matches data-business lanes","object":"Infrastructure","text":"Scaling laws for reward model overoptimization matches data-business lanes Infrastructure."}]},"signal":{"id":"e1076d0c-223a-4bb2-8b37-c35d741406ad","url":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad","json_url":"https://onlylabs.fyi/signals/e1076d0c-223a-4bb2-8b37-c35d741406ad/signal.json","source_url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","title":"Scaling laws for reward model overoptimization","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2022-10-19T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/scaling-laws-for-reward-model-overoptimization"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":13,"matched_terms":["scaling"],"reason":"OpenAI has a writing signal matching infrastructure."}},"primary_evidence_page":{"url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","final_url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","title":"Scaling laws for reward model overoptimization","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:45.683+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Scaling laws for reward model overoptimization | OpenAI October 19, 2022 Scaling laws for reward model overoptimization Loading… Share Abstract In reinforcement learning from human feedback, it is common to optimize against a reward model trained to predict human preferences. Because the reward model is an imperfect proxy, optimizing its value too much can hinder ground truth performance, in accordance with Goodhart's law. This effect has been frequently observed, but not carefully measured due to the expense of collecting human preference data. In this work, we use a synthetic setup in which a fixed \"gold-standard\" reward model plays the role of humans, providing labels used to train a proxy reward model. We study how the gold reward model score changes as we optimize against the proxy reward model using either reinforcement learning or best-of-n sampling. We find that this relationship follows a different functional form depending on the method of optimization, and that in both cases its coefficients scale smoothly with the number of reward model parameters. We also study the effect on this relationship of the size of the reward model dataset, the number of reward model and..."},"evidence_pages":[{"url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","final_url":"https://openai.com/index/scaling-laws-for-reward-model-overoptimization","title":"Scaling laws for reward model overoptimization","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:45.683+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Scaling laws for reward model overoptimization | OpenAI October 19, 2022 Scaling laws for reward model overoptimization Loading… Share Abstract In reinforcement learning from human feedback, it is common to optimize against a reward model trained to predict human preferences. Because the reward model is an imperfect proxy, optimizing its value too much can hinder ground truth performance, in accordance with Goodhart's law. This effect has been frequently observed, but not carefully measured due to the expense of collecting human preference data. In this work, we use a synthetic setup in which a fixed \"gold-standard\" reward model plays the role of humans, providing labels used to train a proxy reward model. We study how the gold reward model score changes as we optimize against the proxy reward model using either reinforcement learning or best-of-n sampling. We find that this relationship follows a different functional form depending on the method of optimization, and that in both cases its coefficients scale smoothly with the number of reward model parameters. We also study the effect on this relationship of the size of the reward model dataset, the number of reward model and..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}