{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: How confessions can keep language models honest","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2","json_url":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2/signal.json","generated_at":"2026-06-08T15:45:28.466+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2","signal_json":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2/signal.json","source":"https://openai.com/index/how-confessions-can-keep-language-models-honest","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"OpenAI published How confessions can keep language models honest. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Research post, low traction · How confessions can keep language models honest | OpenAI December 3, 2025 How confessions can keep language models honest We’re sharing an early, proof-of-concept method.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality, Safety and policy in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","source_host":"openai.com","occurred_at":"2025-12-03T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Notability","value":"Research post, low traction","source":"signal"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"testing","source":"radar"},{"label":"Matched term","value":"trust","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/how-confessions-can-keep-language-models-honest"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:45:28.466+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["testing","trust"],"score":26,"reason":"OpenAI has a writing signal matching evals and quality, safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality, Safety and policy?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2/signal.json","required":true},{"label":"source","url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"How confessions can keep language models honest\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"How confessions can keep language models honest","text":"OpenAI published How confessions can keep language models honest."},{"subject":"How confessions can keep language models honest","predicate":"is classified as","object":"writing signal","text":"How confessions can keep language models honest is classified as writing signal."},{"subject":"How confessions can keep language models honest","predicate":"belongs to","object":"talking desk","text":"How confessions can keep language models honest belongs to talking desk."},{"subject":"How confessions can keep language models honest","predicate":"has evidence coverage","object":"1 captured evidence page","text":"How confessions can keep language models honest has evidence coverage 1 captured evidence page."},{"subject":"How confessions can keep language models honest","predicate":"matches data-business lanes","object":"Evals and quality, Safety and policy","text":"How confessions can keep language models honest matches data-business lanes Evals and quality, Safety and policy."},{"subject":"How confessions can keep language models honest","predicate":"has captured page count","object":"1","text":"How confessions can keep language models honest has captured page count 1."},{"subject":"How confessions can keep language models honest","predicate":"has readable page count","object":"1","text":"How confessions can keep language models honest has readable page count 1."},{"subject":"How confessions can keep language models honest","predicate":"has related signal count","object":"6","text":"How confessions can keep language models honest has related signal count 6."},{"subject":"How confessions can keep language models honest","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"How confessions can keep language models honest has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"How confessions can keep language models honest","predicate":"has source host","object":"openai.com","text":"How confessions can keep language models honest has source host openai.com."},{"subject":"How confessions can keep language models honest","predicate":"has lab","object":"OpenAI","text":"How confessions can keep language models honest has lab OpenAI."},{"subject":"How confessions can keep language models honest","predicate":"has signal desk","object":"talking","text":"How confessions can keep language models honest has signal desk talking."},{"subject":"How confessions can keep language models honest","predicate":"has source host","object":"openai.com","text":"How confessions can keep language models honest has source host openai.com."},{"subject":"How confessions can keep language models honest","predicate":"has notability","object":"Research post, low traction","text":"How confessions can keep language models honest has notability Research post, low traction."},{"subject":"How confessions can keep language models honest","predicate":"has radar lane","object":"Evals and quality","text":"How confessions can keep language models honest has radar lane Evals and quality."},{"subject":"How confessions can keep language models honest","predicate":"has radar lane","object":"Safety and policy","text":"How confessions can keep language models honest has radar lane Safety and policy."},{"subject":"How confessions can keep language models honest","predicate":"has matched term","object":"testing","text":"How confessions can keep language models honest has matched term testing."},{"subject":"How confessions can keep language models honest","predicate":"has matched term","object":"trust","text":"How confessions can keep language models honest has matched term trust."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published How confessions can keep language models honest. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Research post, low traction · How confessions can keep language models honest | OpenAI December 3, 2025 How confessions can keep language models honest We’re sharing an early, proof-of-concept method.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality, Safety and policy in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"How confessions can keep language models honest","text":"OpenAI published How confessions can keep language models honest."},{"subject":"How confessions can keep language models honest","predicate":"is classified as","object":"writing signal","text":"How confessions can keep language models honest is classified as writing signal."},{"subject":"How confessions can keep language models honest","predicate":"belongs to","object":"talking desk","text":"How confessions can keep language models honest belongs to talking desk."},{"subject":"How confessions can keep language models honest","predicate":"has evidence coverage","object":"1 captured evidence page","text":"How confessions can keep language models honest has evidence coverage 1 captured evidence page."},{"subject":"How confessions can keep language models honest","predicate":"matches data-business lanes","object":"Evals and quality, Safety and policy","text":"How confessions can keep language models honest matches data-business lanes Evals and quality, Safety and policy."}]},"signal":{"id":"239ef419-8fa5-4449-beef-520b022f1fe2","url":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2","json_url":"https://onlylabs.fyi/signals/239ef419-8fa5-4449-beef-520b022f1fe2/signal.json","source_url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","title":"How confessions can keep language models honest","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2025-12-03T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/how-confessions-can-keep-language-models-honest"]},"facets":{},"traction":{"github_stars":null,"hn_points":5,"hn_comments":0,"hn_story_id":"46146179","hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":26,"matched_terms":["testing","trust"],"reason":"OpenAI has a writing signal matching evals and quality, safety and policy."}},"primary_evidence_page":{"url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","final_url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","title":"How confessions can keep language models honest","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:28.466+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"How confessions can keep language models honest | OpenAI December 3, 2025 How confessions can keep language models honest We’re sharing an early, proof-of-concept method that trains models to report when they break instructions or take unintended shortcuts. Loading… Share AI systems are becoming more capable, and we want to understand them as deeply as possible—including how and why they arrive at an answer. Sometimes a model takes a shortcut or optimizes for the wrong objective, but its final output still looks correct. If we can surface when that happens, we can better monitor deployed systems, improve training, and increase trust in the outputs. Research by OpenAI and others has shown that AI models can hallucinate⁠, reward-hack, or be dishonest. At the moment, we see the most concerning misbehaviors, such as scheming⁠, only in stress-tests and adversarial evaluations. But as models become more capable and increasingly agentic, even rare forms of misalignment become more consequential, motivating us to invest in methods that help us better detect, understand, and mitigate these risks. This work explores one such approach: training models to explicitly admit when they engage in..."},"evidence_pages":[{"url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","final_url":"https://openai.com/index/how-confessions-can-keep-language-models-honest","title":"How confessions can keep language models honest","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:28.466+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"How confessions can keep language models honest | OpenAI December 3, 2025 How confessions can keep language models honest We’re sharing an early, proof-of-concept method that trains models to report when they break instructions or take unintended shortcuts. Loading… Share AI systems are becoming more capable, and we want to understand them as deeply as possible—including how and why they arrive at an answer. Sometimes a model takes a shortcut or optimizes for the wrong objective, but its final output still looks correct. If we can surface when that happens, we can better monitor deployed systems, improve training, and increase trust in the outputs. Research by OpenAI and others has shown that AI models can hallucinate⁠, reward-hack, or be dishonest. At the moment, we see the most concerning misbehaviors, such as scheming⁠, only in stress-tests and adversarial evaluations. But as models become more capable and increasingly agentic, even rare forms of misalignment become more consequential, motivating us to invest in methods that help us better detect, understand, and mitigate these risks. This work explores one such approach: training models to explicitly admit when they engage in..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}