{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Variance reduction for policy gradient with action-dependent factorized baselines","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7","json_url":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7/signal.json","generated_at":"2026-06-08T15:47:05.463+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7","signal_json":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7/signal.json","source":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Variance reduction for policy gradient with action-dependent factorized baselines. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Variance reduction for policy gradient with action-dependent factorized baselines | OpenAI March 20, 2018 Variance reduction for policy gradient with action-dependent.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Safety and policy in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","source_host":"openai.com","occurred_at":"2018-03-20T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"policy","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:47:05.463+00:00"},"data_business":{"matches":true,"lanes":[{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["policy"],"score":13,"reason":"OpenAI has a writing signal matching safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Safety and policy?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7/signal.json","required":true},{"label":"source","url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Variance reduction for policy gradient with action-dependent factorized baselines\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Variance reduction for policy gradient with action-dependent factorized baselines","text":"OpenAI published Variance reduction for policy gradient with action-dependent factorized baselines."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"is classified as","object":"writing signal","text":"Variance reduction for policy gradient with action-dependent factorized baselines is classified as writing signal."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"belongs to","object":"talking desk","text":"Variance reduction for policy gradient with action-dependent factorized baselines belongs to talking desk."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Variance reduction for policy gradient with action-dependent factorized baselines has evidence coverage 1 captured evidence page."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"matches data-business lanes","object":"Safety and policy","text":"Variance reduction for policy gradient with action-dependent factorized baselines matches data-business lanes Safety and policy."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has captured page count","object":"1","text":"Variance reduction for policy gradient with action-dependent factorized baselines has captured page count 1."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has readable page count","object":"1","text":"Variance reduction for policy gradient with action-dependent factorized baselines has readable page count 1."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has related signal count","object":"6","text":"Variance reduction for policy gradient with action-dependent factorized baselines has related signal count 6."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Variance reduction for policy gradient with action-dependent factorized baselines has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has source host","object":"openai.com","text":"Variance reduction for policy gradient with action-dependent factorized baselines has source host openai.com."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has lab","object":"OpenAI","text":"Variance reduction for policy gradient with action-dependent factorized baselines has lab OpenAI."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has signal desk","object":"talking","text":"Variance reduction for policy gradient with action-dependent factorized baselines has signal desk talking."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has source host","object":"openai.com","text":"Variance reduction for policy gradient with action-dependent factorized baselines has source host openai.com."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has radar lane","object":"Safety and policy","text":"Variance reduction for policy gradient with action-dependent factorized baselines has radar lane Safety and policy."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has matched term","object":"policy","text":"Variance reduction for policy gradient with action-dependent factorized baselines has matched term policy."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has watch term","object":"RL environments","text":"Variance reduction for policy gradient with action-dependent factorized baselines has watch term RL environments."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has watch term","object":"Eval methodology","text":"Variance reduction for policy gradient with action-dependent factorized baselines has watch term Eval methodology."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has watch term","object":"Infrastructure","text":"Variance reduction for policy gradient with action-dependent factorized baselines has watch term Infrastructure."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Variance reduction for policy gradient with action-dependent factorized baselines. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Variance reduction for policy gradient with action-dependent factorized baselines | OpenAI March 20, 2018 Variance reduction for policy gradient with action-dependent.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Safety and policy in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Variance reduction for policy gradient with action-dependent factorized baselines","text":"OpenAI published Variance reduction for policy gradient with action-dependent factorized baselines."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"is classified as","object":"writing signal","text":"Variance reduction for policy gradient with action-dependent factorized baselines is classified as writing signal."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"belongs to","object":"talking desk","text":"Variance reduction for policy gradient with action-dependent factorized baselines belongs to talking desk."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Variance reduction for policy gradient with action-dependent factorized baselines has evidence coverage 1 captured evidence page."},{"subject":"Variance reduction for policy gradient with action-dependent factorized baselines","predicate":"matches data-business lanes","object":"Safety and policy","text":"Variance reduction for policy gradient with action-dependent factorized baselines matches data-business lanes Safety and policy."}]},"signal":{"id":"676d6da5-f712-4016-908f-9aed72a544f7","url":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7","json_url":"https://onlylabs.fyi/signals/676d6da5-f712-4016-908f-9aed72a544f7/signal.json","source_url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","title":"Variance reduction for policy gradient with action-dependent factorized baselines","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2018-03-20T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":13,"matched_terms":["policy"],"reason":"OpenAI has a writing signal matching safety and policy."}},"primary_evidence_page":{"url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","final_url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","title":"Variance reduction for policy gradient with action-dependent factorized baselines","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:05.463+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Variance reduction for policy gradient with action-dependent factorized baselines | OpenAI March 20, 2018 Variance reduction for policy gradient with action-dependent factorized baselines Loading… Share Abstract Policy gradient methods have enjoyed great success in deep reinforcement learning but suffer from high variance of gradient estimates. The high variance problem is particularly exasperated in problems with long horizons or high-dimensional action spaces. To mitigate this issue, we derive a bias-free action-dependent baseline for variance reduction which fully exploits the structural form of the stochastic policy itself and does not make any additional assumptions about the MDP. We demonstrate and quantify the benefit of the action-dependent baseline through both theoretical analysis as well as numerical results, including an analysis of the suboptimality of the optimal state-dependent baseline. The result is a computationally efficient policy gradient algorithm, which scales to high-dimensional control problems, as demonstrated by a synthetic 2000-dimensional target matching task. Our experimental results indicate that action-dependent baselines allow for faster learning..."},"evidence_pages":[{"url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","final_url":"https://openai.com/index/variance-reduction-for-policy-gradient-with-action-dependent-factorized-baselines","title":"Variance reduction for policy gradient with action-dependent factorized baselines","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:05.463+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Variance reduction for policy gradient with action-dependent factorized baselines | OpenAI March 20, 2018 Variance reduction for policy gradient with action-dependent factorized baselines Loading… Share Abstract Policy gradient methods have enjoyed great success in deep reinforcement learning but suffer from high variance of gradient estimates. The high variance problem is particularly exasperated in problems with long horizons or high-dimensional action spaces. To mitigate this issue, we derive a bias-free action-dependent baseline for variance reduction which fully exploits the structural form of the stochastic policy itself and does not make any additional assumptions about the MDP. We demonstrate and quantify the benefit of the action-dependent baseline through both theoretical analysis as well as numerical results, including an analysis of the suboptimality of the optimal state-dependent baseline. The result is a computationally efficient policy gradient algorithm, which scales to high-dimensional control problems, as demonstrated by a synthetic 2000-dimensional target matching task. Our experimental results indicate that action-dependent baselines allow for faster learning..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}