{"schema_version":"onlylabs.public_signal.v1","title":"OpenAI Repo: openai/simple-evals","description":"OpenAI repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a","json_url":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a/signal.json","generated_at":"2026-06-11T04:02:05.926956+00:00","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/openai","dossier_json_url":"https://onlylabs.fyi/labs/openai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a","signal_json":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a/signal.json","source":"https://github.com/openai/simple-evals","lab_dossier":"https://onlylabs.fyi/labs/openai","lab_dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis":"https://onlylabs.fyi/analysis/openai","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"OpenAI published openai/simple-evals (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo openai/simple-evals · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/openai/simple-evals","source_host":"github.com","occurred_at":"2024-04-11T22:38:17+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"OpenAI","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"openai/simple-evals","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"4,522","source":"traction"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Matched term","value":"evals","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/openai/simple-evals"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:02:05.926956+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval","evals"],"score":17,"reason":"OpenAI has a repo signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a/signal.json","dossier_json":"https://onlylabs.fyi/labs/openai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/openai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/openai/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a/signal.json","required":true},{"label":"source","url":"https://github.com/openai/simple-evals","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/openai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/openai/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze OpenAI's repo signal \"openai/simple-evals\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"OpenAI","predicate":"published repo","object":"openai/simple-evals","text":"OpenAI published repo openai/simple-evals."},{"subject":"openai/simple-evals","predicate":"is classified as","object":"repo signal","text":"openai/simple-evals is classified as repo signal."},{"subject":"openai/simple-evals","predicate":"belongs to","object":"repos desk","text":"openai/simple-evals belongs to repos desk."},{"subject":"openai/simple-evals","predicate":"has context","object":"Python","text":"openai/simple-evals has context Python."},{"subject":"openai/simple-evals","predicate":"has evidence coverage","object":"1 captured evidence page","text":"openai/simple-evals has evidence coverage 1 captured evidence page."},{"subject":"openai/simple-evals","predicate":"matches data-business lanes","object":"Evals and quality","text":"openai/simple-evals matches data-business lanes Evals and quality."},{"subject":"openai/simple-evals","predicate":"has captured page count","object":"1","text":"openai/simple-evals has captured page count 1."},{"subject":"openai/simple-evals","predicate":"has readable page count","object":"1","text":"openai/simple-evals has readable page count 1."},{"subject":"openai/simple-evals","predicate":"has related signal count","object":"6","text":"openai/simple-evals has related signal count 6."},{"subject":"openai/simple-evals","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"openai/simple-evals has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"openai/simple-evals","predicate":"has source host","object":"github.com","text":"openai/simple-evals has source host github.com."},{"subject":"openai/simple-evals","predicate":"has lab","object":"OpenAI","text":"openai/simple-evals has lab OpenAI."},{"subject":"openai/simple-evals","predicate":"has signal desk","object":"repos","text":"openai/simple-evals has signal desk repos."},{"subject":"openai/simple-evals","predicate":"has source host","object":"github.com","text":"openai/simple-evals has source host github.com."},{"subject":"openai/simple-evals","predicate":"has repository","object":"openai/simple-evals","text":"openai/simple-evals has repository openai/simple-evals."},{"subject":"openai/simple-evals","predicate":"has language","object":"Python","text":"openai/simple-evals has language Python."},{"subject":"openai/simple-evals","predicate":"has stars","object":"4,522","text":"openai/simple-evals has stars 4,522."},{"subject":"openai/simple-evals","predicate":"has radar lane","object":"Evals and quality","text":"openai/simple-evals has radar lane Evals and quality."},{"subject":"openai/simple-evals","predicate":"has matched term","object":"eval","text":"openai/simple-evals has matched term eval."}]},"intelligence":{"signal_desk":"repos","answer":"OpenAI published openai/simple-evals (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo openai/simple-evals · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"OpenAI","predicate":"published repo","object":"openai/simple-evals","text":"OpenAI published repo openai/simple-evals."},{"subject":"openai/simple-evals","predicate":"is classified as","object":"repo signal","text":"openai/simple-evals is classified as repo signal."},{"subject":"openai/simple-evals","predicate":"belongs to","object":"repos desk","text":"openai/simple-evals belongs to repos desk."},{"subject":"openai/simple-evals","predicate":"has context","object":"Python","text":"openai/simple-evals has context Python."},{"subject":"openai/simple-evals","predicate":"has evidence coverage","object":"1 captured evidence page","text":"openai/simple-evals has evidence coverage 1 captured evidence page."},{"subject":"openai/simple-evals","predicate":"matches data-business lanes","object":"Evals and quality","text":"openai/simple-evals matches data-business lanes Evals and quality."}]},"signal":{"id":"5ee10fa0-971f-4fbf-91c7-594e596c7d0a","url":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a","json_url":"https://onlylabs.fyi/signals/5ee10fa0-971f-4fbf-91c7-594e596c7d0a/signal.json","source_url":"https://github.com/openai/simple-evals","title":"openai/simple-evals","summary":"OpenAI published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2024-04-11T22:38:17+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/openai/simple-evals"]},"facets":{"repo":"openai/simple-evals","language":"Python"},"traction":{"github_stars":4522,"hn_points":3,"hn_comments":0,"hn_story_id":"40349583","hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":17,"matched_terms":["eval","evals"],"reason":"OpenAI has a repo signal matching evals and quality."}},"primary_evidence_page":{"url":"https://github.com/openai/simple-evals","final_url":"https://github.com/openai/simple-evals","title":"openai/simple-evals repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:02:05.926956+00:00","bytes":18118,"raw_path":"ad724b6d82e63f18da8451cc74791e532e64b03c9dddb8618da5f87978ea72e2.json","content_hash":"4ba83cdb67bdf602baf875fa8533d9f88764a4da143388d8940821213169464a","excerpt_chars":1200,"truncated":true,"excerpt":"openai/simple-evals Language: Python License: MIT Stars: 4521 Forks: 492 Open issues: 56 Created: 2024-04-11T22:38:17Z Pushed: 2026-04-22T22:16:18Z Default branch: main Fork: no Archived: no README: ⚠️ Deprecation Notice **July 2025**: `simple-evals` will no longer be updated for new models or benchmark results. The repo will continue to host reference implementations for **HealthBench**, **BrowseComp**, and **SimpleQA**. --- Overview This repository contains a lightweight library for evaluating language models. We are open sourcing it so we can be transparent about the accuracy numbers we're publishing alongside our latest models. Benchmark Results | Model | Prompt | MMLU | GPQA [^8] | MATH [^6]| HumanEval | MGSM[^5] | DROP[^5]<br>(F1, 3-shot) | SimpleQA |:----------------------------:|:-------------:|:------:|:------:|:--------:|:---------:|:------:|:--------------------------:|:---------:| | **o3** | | | | | | | | | | | o3-high [^10] | n/a [^7] | 93.3 | 83.4 | 98.1 | 88.4 | 92.0 | 89.8 | 48.6 | | o3 [^9] [^10] | n/a | 92.9 | 82.8 | 97.8 | 87.4 | 92.3 | 80.6 | 49.4 | | o3-low [^10] | n/a | 92.8 | 78.6 | 96.9 | 87.3 | 91.9 | 82.3 | 49.4 | | **o4-mini** | | | | | | | | | |..."},"evidence_pages":[{"url":"https://github.com/openai/simple-evals","final_url":"https://github.com/openai/simple-evals","title":"openai/simple-evals repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:02:05.926956+00:00","bytes":18118,"raw_path":"ad724b6d82e63f18da8451cc74791e532e64b03c9dddb8618da5f87978ea72e2.json","content_hash":"4ba83cdb67bdf602baf875fa8533d9f88764a4da143388d8940821213169464a","excerpt_chars":1200,"truncated":true,"excerpt":"openai/simple-evals Language: Python License: MIT Stars: 4521 Forks: 492 Open issues: 56 Created: 2024-04-11T22:38:17Z Pushed: 2026-04-22T22:16:18Z Default branch: main Fork: no Archived: no README: ⚠️ Deprecation Notice **July 2025**: `simple-evals` will no longer be updated for new models or benchmark results. The repo will continue to host reference implementations for **HealthBench**, **BrowseComp**, and **SimpleQA**. --- Overview This repository contains a lightweight library for evaluating language models. We are open sourcing it so we can be transparent about the accuracy numbers we're publishing alongside our latest models. Benchmark Results | Model | Prompt | MMLU | GPQA [^8] | MATH [^6]| HumanEval | MGSM[^5] | DROP[^5]<br>(F1, 3-shot) | SimpleQA |:----------------------------:|:-------------:|:------:|:------:|:--------:|:---------:|:------:|:--------------------------:|:---------:| | **o3** | | | | | | | | | | | o3-high [^10] | n/a [^7] | 93.3 | 83.4 | 98.1 | 88.4 | 92.0 | 89.8 | 48.6 | | o3 [^9] [^10] | n/a | 92.9 | 82.8 | 97.8 | 87.4 | 92.3 | 80.6 | 49.4 | | o3-low [^10] | n/a | 92.8 | 78.6 | 96.9 | 87.3 | 91.9 | 82.3 | 49.4 | | **o4-mini** | | | | | | | | | |..."}],"related_signals":[{"id":"9b55a747-c2bc-494f-a3ad-ef484503c650","url":"https://onlylabs.fyi/signals/9b55a747-c2bc-494f-a3ad-ef484503c650","source_url":"https://github.com/openai/role-specific-plugins","title":"openai/role-specific-plugins","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-06-02T16:27:24+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"26c411d2-1675-4d85-a267-55c0a2a1b6b4","url":"https://onlylabs.fyi/signals/26c411d2-1675-4d85-a267-55c0a2a1b6b4","source_url":"https://github.com/openai/imagegencam","title":"openai/imagegencam","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-05-19T23:14:18+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"e7a21543-9852-40bb-9102-041209e64fa5","url":"https://onlylabs.fyi/signals/e7a21543-9852-40bb-9102-041209e64fa5","source_url":"https://github.com/openai/openai-cli","title":"openai/openai-cli","context":"Go","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-05-01T16:40:05+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"359b222c-96a1-40f5-b8d4-7a469ee9825c","url":"https://onlylabs.fyi/signals/359b222c-96a1-40f5-b8d4-7a469ee9825c","source_url":"https://github.com/openai/openai-realtime-meeting-assistant","title":"openai/openai-realtime-meeting-assistant","context":"Go","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-04-29T22:54:59+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"1bf8025d-8227-489e-8e04-57d8919213fc","url":"https://onlylabs.fyi/signals/1bf8025d-8227-489e-8e04-57d8919213fc","source_url":"https://github.com/openai/monitorability-evals","title":"openai/monitorability-evals","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-04-22T22:12:39+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"},{"id":"f61802fb-361e-4d2b-b871-57ea3a33fb22","url":"https://onlylabs.fyi/signals/f61802fb-361e-4d2b-b871-57ea3a33fb22","source_url":"https://github.com/openai/privacy-filter","title":"openai/privacy-filter","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"occurred_at":"2026-04-17T22:49:09+00:00","first_seen_at":"2026-06-05T05:42:58.317915+00:00","date_source":"source"}]}