{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Writing: Improving language model behavior by training on a curated dataset","description":"OpenAI writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390","json_url":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390/signal.json","generated_at":"2026-06-08T15:46:51.695+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390","signal_json":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390/signal.json","source":"https://openai.com/index/improving-language-model-behavior","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"OpenAI published Improving language model behavior by training on a curated dataset. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Improving language model behavior by training on a curated dataset | OpenAI June 10, 2021 Improving language model behavior by training on a curated dataset Our latest.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Data demand, Infrastructure in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://openai.com/index/improving-language-model-behavior","source_host":"openai.com","occurred_at":"2021-06-10T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"openai.com","source":"source"},{"label":"Radar lane","value":"Data demand","source":"radar"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"data","source":"radar"},{"label":"Matched term","value":"dataset","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/improving-language-model-behavior"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:46:51.695+00:00"},"data_business":{"matches":true,"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["data","dataset","training"],"score":27,"reason":"OpenAI has a writing signal matching data demand, infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Data demand, Infrastructure?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390/signal.json","required":true},{"label":"source","url":"https://openai.com/index/improving-language-model-behavior","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's writing signal \"Improving language model behavior by training on a curated dataset\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Improving language model behavior by training on a curated dataset","text":"OpenAI published Improving language model behavior by training on a curated dataset."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"is classified as","object":"writing signal","text":"Improving language model behavior by training on a curated dataset is classified as writing signal."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"belongs to","object":"talking desk","text":"Improving language model behavior by training on a curated dataset belongs to talking desk."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Improving language model behavior by training on a curated dataset has evidence coverage 1 captured evidence page."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"matches data-business lanes","object":"Data demand, Infrastructure","text":"Improving language model behavior by training on a curated dataset matches data-business lanes Data demand, Infrastructure."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has captured page count","object":"1","text":"Improving language model behavior by training on a curated dataset has captured page count 1."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has readable page count","object":"1","text":"Improving language model behavior by training on a curated dataset has readable page count 1."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has related signal count","object":"6","text":"Improving language model behavior by training on a curated dataset has related signal count 6."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Improving language model behavior by training on a curated dataset has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has source host","object":"openai.com","text":"Improving language model behavior by training on a curated dataset has source host openai.com."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has lab","object":"OpenAI","text":"Improving language model behavior by training on a curated dataset has lab OpenAI."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has signal desk","object":"talking","text":"Improving language model behavior by training on a curated dataset has signal desk talking."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has source host","object":"openai.com","text":"Improving language model behavior by training on a curated dataset has source host openai.com."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has radar lane","object":"Data demand","text":"Improving language model behavior by training on a curated dataset has radar lane Data demand."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has radar lane","object":"Infrastructure","text":"Improving language model behavior by training on a curated dataset has radar lane Infrastructure."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has matched term","object":"data","text":"Improving language model behavior by training on a curated dataset has matched term data."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has matched term","object":"dataset","text":"Improving language model behavior by training on a curated dataset has matched term dataset."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has matched term","object":"training","text":"Improving language model behavior by training on a curated dataset has matched term training."}]},"intelligence":{"signal_desk":"talking","answer":"OpenAI published Improving language model behavior by training on a curated dataset. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Improving language model behavior by training on a curated dataset | OpenAI June 10, 2021 Improving language model behavior by training on a curated dataset Our latest.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Data demand, Infrastructure in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published","object":"Improving language model behavior by training on a curated dataset","text":"OpenAI published Improving language model behavior by training on a curated dataset."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"is classified as","object":"writing signal","text":"Improving language model behavior by training on a curated dataset is classified as writing signal."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"belongs to","object":"talking desk","text":"Improving language model behavior by training on a curated dataset belongs to talking desk."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Improving language model behavior by training on a curated dataset has evidence coverage 1 captured evidence page."},{"subject":"Improving language model behavior by training on a curated dataset","predicate":"matches data-business lanes","object":"Data demand, Infrastructure","text":"Improving language model behavior by training on a curated dataset matches data-business lanes Data demand, Infrastructure."}]},"signal":{"id":"6be874fe-eedb-4d31-96d8-c1da6de7d390","url":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390","json_url":"https://onlylabs.fyi/signals/6be874fe-eedb-4d31-96d8-c1da6de7d390/signal.json","source_url":"https://openai.com/index/improving-language-model-behavior","title":"Improving language model behavior by training on a curated dataset","summary":"OpenAI published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2021-06-10T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["exa"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://openai.com/index/improving-language-model-behavior"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":27,"matched_terms":["data","dataset","training"],"reason":"OpenAI has a writing signal matching data demand, infrastructure."}},"primary_evidence_page":{"url":"https://openai.com/index/improving-language-model-behavior","final_url":"https://openai.com/index/improving-language-model-behavior","title":"Improving language model behavior by training on a curated dataset","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:51.695+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Improving language model behavior by training on a curated dataset | OpenAI June 10, 2021 Improving language model behavior by training on a curated dataset Our latest research finds we can improve language model behavior with respect to specific behavioral values by fine-tuning on a small, curated dataset. Loading… Share We’ve found we can improve language model behavior with respect to specific behavioral values by fine-tuning on a curated dataset of <100 examples of those values. We also found that this process becomes more effective as models get larger. While the technique is still nascent, we’re looking for OpenAI API users who would like to try it out and are excited to find ways to use these and other techniques in production use cases. Language models can output almost any kind of text, in any kind of tone or personality, depending on the user’s input. Our approach aims to give language model operators the tools to narrow this universal set of behaviors to a constrained set of values. While OpenAI provides guardrails and monitoring to ensure that model use-cases are compatible with our Charter⁠, we view selecting the exact set of Charter-compatible values for the model as..."},"evidence_pages":[{"url":"https://openai.com/index/improving-language-model-behavior","final_url":"https://openai.com/index/improving-language-model-behavior","title":"Improving language model behavior by training on a curated dataset","http_status":200,"content_type":null,"capture_method":"exa","fetched_at":"2026-06-08T15:46:51.695+00:00","bytes":null,"raw_path":null,"content_hash":null,"excerpt_chars":1200,"truncated":true,"excerpt":"Improving language model behavior by training on a curated dataset | OpenAI June 10, 2021 Improving language model behavior by training on a curated dataset Our latest research finds we can improve language model behavior with respect to specific behavioral values by fine-tuning on a small, curated dataset. Loading… Share We’ve found we can improve language model behavior with respect to specific behavioral values by fine-tuning on a curated dataset of <100 examples of those values. We also found that this process becomes more effective as models get larger. While the technique is still nascent, we’re looking for OpenAI API users who would like to try it out and are excited to find ways to use these and other techniques in production use cases. Language models can output almost any kind of text, in any kind of tone or personality, depending on the user’s input. Our approach aims to give language model operators the tools to narrow this universal set of behaviors to a constrained set of values. While OpenAI provides guardrails and monitoring to ensure that model use-cases are compatible with our Charter⁠, we view selecting the exact set of Charter-compatible values for the model as..."}],"related_signals":[{"id":"b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","url":"https://onlylabs.fyi/signals/b3668d3b-26d2-40c0-9d4f-ed1a67927aa4","source_url":"https://openai.com/index/supporting-eu-trustworthy-ai-ecosystem","title":"Supporting Europe’s work in ensuring a trustworthy AI ecosystem ","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T08:00:56.140796+00:00","date_source":"rss.item_date"},{"id":"2638c0a7-b372-409c-ac72-f6d81d6464dc","url":"https://onlylabs.fyi/signals/2638c0a7-b372-409c-ac72-f6d81d6464dc","source_url":"https://openai.com/index/using-codex-to-simulate-black-holes","title":"How an astrophysicist uses Codex to help simulate black holes","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-11T00:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"509ea784-51ec-4ede-855b-5a4d1b27d3be","url":"https://onlylabs.fyi/signals/509ea784-51ec-4ede-855b-5a4d1b27d3be","source_url":"https://openai.com/index/openai-on-oracle-cloud","title":"Access OpenAI models and Codex through your Oracle cloud commitment","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T20:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4f051449-87f2-466e-941e-b5918381a8fe","url":"https://onlylabs.fyi/signals/4f051449-87f2-466e-941e-b5918381a8fe","source_url":"https://openai.com/index/prc-linked-influence-operations-ai-debates","title":"PRC-linked influence operations are targeting AI debates in the US","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T12:00:00+00:00","first_seen_at":"2026-06-11T07:01:16.936464+00:00","date_source":"rss.item_date"},{"id":"4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","url":"https://onlylabs.fyi/signals/4507c0c1-cb74-4bb3-b62b-5f6c2d37e20d","source_url":"https://openai.com/index/lseg","title":"From data to decisions: how LSEG is scaling trusted AI","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-10T00:00:00+00:00","first_seen_at":"2026-06-10T09:18:54.26094+00:00","date_source":"rss.item_date"},{"id":"fb16aa7a-c4ef-4859-b514-0839c2f1330d","url":"https://onlylabs.fyi/signals/fb16aa7a-c4ef-4859-b514-0839c2f1330d","source_url":"https://openai.com/index/nextdoor","title":"How engineers at Nextdoor use Codex to build without limits","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-09T12:00:00+00:00","first_seen_at":"2026-06-10T07:01:28.700378+00:00","date_source":"rss.item_date"}]}