{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Evaluating chain-of-thought monitorability","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190","json_url":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190/signal.json","generated_at":"2026-06-08T15:45:26.1+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190","signal_json":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190/signal.json","source":"https://openai.com/index/evaluating-chain-of-thought-monitorability","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Evaluating chain-of-thought monitorability. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: OpenAI research post with moderate traction · Evaluating chain-of-thought monitorability | OpenAI December 18, 2025 Evaluating chain-of-thought monitorability We introduce evaluations for chain-of-thought.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality, Infrastructure in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","source_host":"openai.com","occurred_at":"2025-12-18T12:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Notability","value":"OpenAI research post with moderate traction","source":"signal"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Matched term","value":"evaluation","source":"radar"},{"label":"Matched term","value":"systems","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/evaluating-chain-of-thought-monitorability"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:45:26.1+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["eval","evaluation","systems"],"score":29,"reason":"OpenAI has a writing signal matching evals and quality, infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality, Infrastructure?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190/signal.json","required":true},{"label":"source","url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Evaluating chain-of-thought monitorability\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Evaluating chain-of-thought monitorability","text":"OpenAI published Evaluating chain-of-thought monitorability."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"is classified as","object":"writing signal","text":"Evaluating chain-of-thought monitorability is classified as writing signal."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"belongs to","object":"talking desk","text":"Evaluating chain-of-thought monitorability belongs to talking desk."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Evaluating chain-of-thought monitorability has evidence coverage 1 captured evidence page."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"matches data-business lanes","object":"Evals and quality, Infrastructure","text":"Evaluating chain-of-thought monitorability matches data-business lanes Evals and quality, Infrastructure."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has captured page count","object":"1","text":"Evaluating chain-of-thought monitorability has captured page count 1."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has readable page count","object":"1","text":"Evaluating chain-of-thought monitorability has readable page count 1."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has related signal count","object":"6","text":"Evaluating chain-of-thought monitorability has related signal count 6."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Evaluating chain-of-thought monitorability has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has source host","object":"openai.com","text":"Evaluating chain-of-thought monitorability has source host openai.com."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has lab","object":"OpenAI","text":"Evaluating chain-of-thought monitorability has lab OpenAI."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has signal desk","object":"talking","text":"Evaluating chain-of-thought monitorability has signal desk talking."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has source host","object":"openai.com","text":"Evaluating chain-of-thought monitorability has source host openai.com."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has notability","object":"OpenAI research post with moderate traction","text":"Evaluating chain-of-thought monitorability has notability OpenAI research post with moderate traction."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has radar lane","object":"Evals and quality","text":"Evaluating chain-of-thought monitorability has radar lane Evals and quality."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has radar lane","object":"Infrastructure","text":"Evaluating chain-of-thought monitorability has radar lane Infrastructure."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has matched term","object":"eval","text":"Evaluating chain-of-thought monitorability has matched term eval."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has matched term","object":"evaluation","text":"Evaluating chain-of-thought monitorability has matched term evaluation."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Evaluating chain-of-thought monitorability. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: OpenAI research post with moderate traction · Evaluating chain-of-thought monitorability | OpenAI December 18, 2025 Evaluating chain-of-thought monitorability We introduce evaluations for chain-of-thought.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality, Infrastructure in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Evaluating chain-of-thought monitorability","text":"OpenAI published Evaluating chain-of-thought monitorability."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"is classified as","object":"writing signal","text":"Evaluating chain-of-thought monitorability is classified as writing signal."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"belongs to","object":"talking desk","text":"Evaluating chain-of-thought monitorability belongs to talking desk."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Evaluating chain-of-thought monitorability has evidence coverage 1 captured evidence page."},{"subject":"Evaluating chain-of-thought monitorability","predicate":"matches data-business lanes","object":"Evals and quality, Infrastructure","text":"Evaluating chain-of-thought monitorability matches data-business lanes Evals and quality, Infrastructure."}]},"signal":{"id":"cd2842fa-ed8d-4e4f-b5cc-c1578acad190","url":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190","json_url":"https://onlylabs.fyi/signals/cd2842fa-ed8d-4e4f-b5cc-c1578acad190/signal.json","source_url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","title":"Evaluating chain-of-thought monitorability","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2025-12-18T12:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/evaluating-chain-of-thought-monitorability"]},"facets":{},"traction":{"github_stars":null,"hn_points":68,"hn_comments":21,"hn_story_id":"46322631","hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":29,"matched_terms":["eval","evaluation","systems"],"reason":"OpenAI has a writing signal matching evals and quality, infrastructure."}},"primary_evidence_page":{"url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","final_url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","title":"Evaluating chain-of-thought monitorability","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:26.1+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Evaluating chain-of-thought monitorability | OpenAI December 18, 2025 Evaluating chain-of-thought monitorability We introduce evaluations for chain-of-thought monitorability and study how it scales with test-time compute, reinforcement learning, and pretraining. Loading… Share When AI systems make decisions that are difficult to supervise directly, it becomes important to understand how those decisions are made. One promising approach is to monitor a model’s internal reasoning, rather than only its actions or final outputs. Modern reasoning models, such as GPT‑5 Thinking, generate an explicit chain-of-thought before producing an answer. Monitoring these chains-of-thought for misbehavior can be far more effective⁠ than monitoring a model’s actions and outputs alone. However, researchers at OpenAI and across the broader industry worry⁠ that this chain-of-thought “monitorability” may be fragile to changes in training procedure, data sources, and even continued scaling of existing algorithms. We want chain-of-thought monitorability to hold up as models scale and are deployed in higher-stakes settings. We call on researchers across the industry to work to preserve chain-of-thought..."},"evidence_pages":[{"url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","final_url":"https://openai.com/index/evaluating-chain-of-thought-monitorability","title":"Evaluating chain-of-thought monitorability","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:45:26.1+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Evaluating chain-of-thought monitorability | OpenAI December 18, 2025 Evaluating chain-of-thought monitorability We introduce evaluations for chain-of-thought monitorability and study how it scales with test-time compute, reinforcement learning, and pretraining. Loading… Share When AI systems make decisions that are difficult to supervise directly, it becomes important to understand how those decisions are made. One promising approach is to monitor a model’s internal reasoning, rather than only its actions or final outputs. Modern reasoning models, such as GPT‑5 Thinking, generate an explicit chain-of-thought before producing an answer. Monitoring these chains-of-thought for misbehavior can be far more effective⁠ than monitoring a model’s actions and outputs alone. However, researchers at OpenAI and across the broader industry worry⁠ that this chain-of-thought “monitorability” may be fragile to changes in training procedure, data sources, and even continued scaling of existing algorithms. We want chain-of-thought monitorability to hold up as models scale and are deployed in higher-stakes settings. We call on researchers across the industry to work to preserve chain-of-thought..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}