{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Evolved Policy Gradients","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a","json_url":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a/signal.json","generated_at":"2026-06-08T15:47:05.334+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a","signal_json":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a/signal.json","source":"https://openai.com/index/evolved-policy-gradients","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Evolved Policy Gradients. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Evolved Policy Gradients | OpenAI April 18, 2018 Milestone Evolved Policy Gradients Read paper View code Loading… Share We’re releasing an experimental metalearning.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure, Safety and policy in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/evolved-policy-gradients","source_host":"openai.com","occurred_at":"2018-04-18T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Matched term","value":"policy","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/evolved-policy-gradients"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:47:05.334+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["training","policy"],"score":25,"reason":"OpenAI has a writing signal matching infrastructure, safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure, Safety and policy?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a/signal.json","required":true},{"label":"source","url":"https://openai.com/index/evolved-policy-gradients","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Evolved Policy Gradients\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Evolved Policy Gradients","text":"OpenAI published Evolved Policy Gradients."},{"subject":"Evolved Policy Gradients","predicate":"is classified as","object":"writing signal","text":"Evolved Policy Gradients is classified as writing signal."},{"subject":"Evolved Policy Gradients","predicate":"belongs to","object":"talking desk","text":"Evolved Policy Gradients belongs to talking desk."},{"subject":"Evolved Policy Gradients","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Evolved Policy Gradients has evidence coverage 1 captured evidence page."},{"subject":"Evolved Policy Gradients","predicate":"matches data-business lanes","object":"Infrastructure, Safety and policy","text":"Evolved Policy Gradients matches data-business lanes Infrastructure, Safety and policy."},{"subject":"Evolved Policy Gradients","predicate":"has captured page count","object":"1","text":"Evolved Policy Gradients has captured page count 1."},{"subject":"Evolved Policy Gradients","predicate":"has readable page count","object":"1","text":"Evolved Policy Gradients has readable page count 1."},{"subject":"Evolved Policy Gradients","predicate":"has related signal count","object":"6","text":"Evolved Policy Gradients has related signal count 6."},{"subject":"Evolved Policy Gradients","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Evolved Policy Gradients has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Evolved Policy Gradients","predicate":"has source host","object":"openai.com","text":"Evolved Policy Gradients has source host openai.com."},{"subject":"Evolved Policy Gradients","predicate":"has lab","object":"OpenAI","text":"Evolved Policy Gradients has lab OpenAI."},{"subject":"Evolved Policy Gradients","predicate":"has signal desk","object":"talking","text":"Evolved Policy Gradients has signal desk talking."},{"subject":"Evolved Policy Gradients","predicate":"has source host","object":"openai.com","text":"Evolved Policy Gradients has source host openai.com."},{"subject":"Evolved Policy Gradients","predicate":"has radar lane","object":"Infrastructure","text":"Evolved Policy Gradients has radar lane Infrastructure."},{"subject":"Evolved Policy Gradients","predicate":"has radar lane","object":"Safety and policy","text":"Evolved Policy Gradients has radar lane Safety and policy."},{"subject":"Evolved Policy Gradients","predicate":"has matched term","object":"training","text":"Evolved Policy Gradients has matched term training."},{"subject":"Evolved Policy Gradients","predicate":"has matched term","object":"policy","text":"Evolved Policy Gradients has matched term policy."},{"subject":"Evolved Policy Gradients","predicate":"has watch term","object":"RL environments","text":"Evolved Policy Gradients has watch term RL environments."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Evolved Policy Gradients. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Evolved Policy Gradients | OpenAI April 18, 2018 Milestone Evolved Policy Gradients Read paper View code Loading… Share We’re releasing an experimental metalearning.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure, Safety and policy in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Evolved Policy Gradients","text":"OpenAI published Evolved Policy Gradients."},{"subject":"Evolved Policy Gradients","predicate":"is classified as","object":"writing signal","text":"Evolved Policy Gradients is classified as writing signal."},{"subject":"Evolved Policy Gradients","predicate":"belongs to","object":"talking desk","text":"Evolved Policy Gradients belongs to talking desk."},{"subject":"Evolved Policy Gradients","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Evolved Policy Gradients has evidence coverage 1 captured evidence page."},{"subject":"Evolved Policy Gradients","predicate":"matches data-business lanes","object":"Infrastructure, Safety and policy","text":"Evolved Policy Gradients matches data-business lanes Infrastructure, Safety and policy."}]},"signal":{"id":"11177376-2b0d-4817-b7d9-795e30252c6a","url":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a","json_url":"https://onlylabs.fyi/signals/11177376-2b0d-4817-b7d9-795e30252c6a/signal.json","source_url":"https://openai.com/index/evolved-policy-gradients","title":"Evolved Policy Gradients","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2018-04-18T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/evolved-policy-gradients"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":25,"matched_terms":["training","policy"],"reason":"OpenAI has a writing signal matching infrastructure, safety and policy."}},"primary_evidence_page":{"url":"https://openai.com/index/evolved-policy-gradients","final_url":"https://openai.com/index/evolved-policy-gradients","title":"Evolved Policy Gradients","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:05.334+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Evolved Policy Gradients | OpenAI April 18, 2018 Milestone Evolved Policy Gradients Read paper View code Loading… Share We’re releasing an experimental metalearning approach called Evolved Policy Gradients, a method that evolves the loss function of learning agents, which can enable fast training on novel tasks. Agents trained with EPG can succeed at basic tasks at test time that were outside their training regime, like learning to navigate to an object on a different side of the room from where it was placed during training. EPG trains agents to have a prior notion of what constitutes making progress on a novel task. Rather than encoding prior knowledge through a learned policy network, EPG encodes it as a learned loss⁠ function⁠. Agents are then able to use this loss function, defined as a temporal-convolutional neural network, to learn quickly on a novel task. We’ve shown that EPG can generalize to out of distribution test time tasks, exhibiting behavior qualitatively different from other popular metalearning algorithms. In tests, we’ve also found that EPG can train agents faster than PPO⁠, an off-the-shelf policy gradient method. EPG is related to previous work on evolving⁠..."},"evidence_pages":[{"url":"https://openai.com/index/evolved-policy-gradients","final_url":"https://openai.com/index/evolved-policy-gradients","title":"Evolved Policy Gradients","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:47:05.334+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Evolved Policy Gradients | OpenAI April 18, 2018 Milestone Evolved Policy Gradients Read paper View code Loading… Share We’re releasing an experimental metalearning approach called Evolved Policy Gradients, a method that evolves the loss function of learning agents, which can enable fast training on novel tasks. Agents trained with EPG can succeed at basic tasks at test time that were outside their training regime, like learning to navigate to an object on a different side of the room from where it was placed during training. EPG trains agents to have a prior notion of what constitutes making progress on a novel task. Rather than encoding prior knowledge through a learned policy network, EPG encodes it as a learned loss⁠ function⁠. Agents are then able to use this loss function, defined as a temporal-convolutional neural network, to learn quickly on a novel task. We’ve shown that EPG can generalize to out of distribution test time tasks, exhibiting behavior qualitatively different from other popular metalearning algorithms. In tests, we’ve also found that EPG can train agents faster than PPO⁠, an off-the-shelf policy gradient method. EPG is related to previous work on evolving⁠..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}