{"schema_version":"onlylabs.data_radar_lane.v1","title":"onlylabs evals and quality radar","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json","generated_at":"2026-06-11T18:09:16.834Z","lane":{"key":"evals","label":"Evals and quality","short_label":"Evals","question":"Where are frontier labs exposing eval, benchmark, quality, red-team, observability, or model behavior testing demand?","description":"Signals around evaluation, benchmarks, red teaming, measurement, quality, observability, and model behavior testing.","terms":["eval","evals","evaluation","benchmark","benchmarks","quality","measurement","observability","red team","testing"],"count":14,"top_orgs":[{"slug":"nvidia","name":"NVIDIA","count":5},{"slug":"microsoft","name":"Microsoft","count":4},{"slug":"amazon","name":"Amazon (Nova)","count":1},{"slug":"anthropic","name":"Anthropic","count":1},{"slug":"cohere","name":"Cohere","count":1}]},"signals":[{"id":"2989aad1-5153-4872-8671-8e728dddbf89","url":"https://onlylabs.fyi/signals/2989aad1-5153-4872-8671-8e728dddbf89","json_url":"https://onlylabs.fyi/signals/2989aad1-5153-4872-8671-8e728dddbf89/signal.json","source_url":"https://github.com/NVIDIA/paidf-augmentation","title":"NVIDIA/paidf-augmentation","org":{"slug":"nvidia","name":"NVIDIA","category":"frontier-lab"},"kind":{"key":"repo_new","label":"Repo"},"occurred_at":"2026-05-19T00:35:07+00:00","first_seen_at":"2026-06-05T20:58:43.255815+00:00","date_source":"source","context":"Python","lanes":["data","evals","infrastructure"],"score":44,"matched_terms":["data","dataset","datasets","eval","evaluation","training"],"reason":"NVIDIA has a repo signal matching data demand, evals and quality, infrastructure."},{"id":"03f718aa-576a-42ec-b0a2-740a7aa053e2","url":"https://onlylabs.fyi/signals/03f718aa-576a-42ec-b0a2-740a7aa053e2","json_url":"https://onlylabs.fyi/signals/03f718aa-576a-42ec-b0a2-740a7aa053e2/signal.json","source_url":"https://openai.com/index/openai-frontier-models-and-codex-are-now-available-on-aws","title":"OpenAI frontier models and Codex are now available on AWS","org":{"slug":"openai","name":"OpenAI","category":"frontier-lab"},"kind":{"key":"post_published","label":"Writing"},"occurred_at":"2026-06-01T10:00:00+00:00","first_seen_at":"2026-06-05T05:42:57.832854+00:00","date_source":"rss.item_date","context":null,"lanes":["evals","product"],"score":36,"matched_terms":["eval","evaluation","product","customer","enterprise"],"reason":"OpenAI has a writing signal matching evals and quality, product and customer."},{"id":"3ebeebe9-57ba-4ce6-93f8-bf02cce6e96d","url":"https://onlylabs.fyi/signals/3ebeebe9-57ba-4ce6-93f8-bf02cce6e96d","json_url":"https://onlylabs.fyi/signals/3ebeebe9-57ba-4ce6-93f8-bf02cce6e96d/signal.json","source_url":"https://github.com/microsoft/RHELM","title":"microsoft/RHELM","org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"kind":{"key":"repo_new","label":"Repo"},"occurred_at":"2026-06-01T04:59:56+00:00","first_seen_at":"2026-06-05T20:58:41.438741+00:00","date_source":"source","context":"HTML","lanes":["evals","infrastructure"],"score":30,"matched_terms":["eval","benchmark","benchmarks","systems"],"reason":"Microsoft has a repo signal matching evals and quality, infrastructure."},{"id":"1745a0a9-a045-456c-a1f7-f4123168fe17","url":"https://onlylabs.fyi/signals/1745a0a9-a045-456c-a1f7-f4123168fe17","json_url":"https://onlylabs.fyi/signals/1745a0a9-a045-456c-a1f7-f4123168fe17/signal.json","source_url":"https://www.amazon.science/blog/ground-truth-is-a-process-not-a-dataset","title":"Ground truth is a process, not a dataset","org":{"slug":"amazon","name":"Amazon (Nova)","category":"frontier-lab"},"kind":{"key":"post_published","label":"Writing"},"occurred_at":"2026-06-03T15:56:57+00:00","first_seen_at":"2026-06-05T20:58:33.860301+00:00","date_source":"rss.item_date","context":null,"lanes":["data","evals"],"score":27,"matched_terms":["data","dataset","benchmark"],"reason":"Amazon (Nova) has a writing signal matching data demand, evals and quality."},{"id":"457f4596-92f6-49ad-9faa-cd3a193cd7f6","url":"https://onlylabs.fyi/signals/457f4596-92f6-49ad-9faa-cd3a193cd7f6","json_url":"https://onlylabs.fyi/signals/457f4596-92f6-49ad-9faa-cd3a193cd7f6/signal.json","source_url":"https://blogs.nvidia.com/blog/cvpr-physical-ai-research-agent-skills/","title":"NVIDIA Enables the Next Era Of Physical AI Research With Agent Skills For Autonomous Vehicles, Robotics And Vision AI","org":{"slug":"nvidia","name":"NVIDIA","category":"frontier-lab"},"kind":{"key":"post_published","label":"Writing"},"occurred_at":"2026-06-03T15:00:35+00:00","first_seen_at":"2026-06-05T20:58:40.457541+00:00","date_source":"rss.item_date","context":null,"lanes":["evals","infrastructure"],"score":27,"matched_terms":["eval","systems","training"],"reason":"NVIDIA has a writing signal matching evals and quality, infrastructure."},{"id":"9c2bf9a4-d1d1-40d5-82e6-f8f753666e51","url":"https://onlylabs.fyi/signals/9c2bf9a4-d1d1-40d5-82e6-f8f753666e51","json_url":"https://onlylabs.fyi/signals/9c2bf9a4-d1d1-40d5-82e6-f8f753666e51/signal.json","source_url":"https://github.com/NVIDIA/srt-slurm-recipes","title":"NVIDIA/srt-slurm-recipes","org":{"slug":"nvidia","name":"NVIDIA","category":"frontier-lab"},"kind":{"key":"repo_new","label":"Repo"},"occurred_at":"2026-06-06T16:27:18+00:00","first_seen_at":"2026-06-09T07:00:17.261926+00:00","date_source":"source","context":"Python","lanes":["evals","infrastructure"],"score":26,"matched_terms":["benchmark","gpu"],"reason":"NVIDIA has a repo signal matching evals and quality, infrastructure."},{"id":"83fc496c-00ff-4dd1-805d-9fa04ad08c1c","url":"https://onlylabs.fyi/signals/83fc496c-00ff-4dd1-805d-9fa04ad08c1c","json_url":"https://onlylabs.fyi/signals/83fc496c-00ff-4dd1-805d-9fa04ad08c1c/signal.json","source_url":"https://blogs.nvidia.com/blog/factory-operations-fox-blueprint-ai-brain/","title":"NVIDIA Factory Operations Blueprint Gives Factories a New AI Brain","org":{"slug":"nvidia","name":"NVIDIA","category":"frontier-lab"},"kind":{"key":"post_published","label":"Writing"},"occurred_at":"2026-06-01T05:00:47+00:00","first_seen_at":"2026-06-05T20:58:40.457541+00:00","date_source":"rss.item_date","context":null,"lanes":["evals","infrastructure"],"score":25,"matched_terms":["quality","systems"],"reason":"NVIDIA has a writing signal matching evals and quality, infrastructure."},{"id":"8d4dafc7-cdc7-400d-919f-0ab23460cab9","url":"https://onlylabs.fyi/signals/8d4dafc7-cdc7-400d-919f-0ab23460cab9","json_url":"https://onlylabs.fyi/signals/8d4dafc7-cdc7-400d-919f-0ab23460cab9/signal.json","source_url":"https://github.com/microsoft/fabric-spark-benchmarks","title":"microsoft/fabric-spark-benchmarks","org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"kind":{"key":"repo_new","label":"Repo"},"occurred_at":"2026-05-26T21:48:47+00:00","first_seen_at":"2026-06-05T20:58:41.438741+00:00","date_source":"source","context":"Python","lanes":["evals"],"score":16,"matched_terms":["benchmark","benchmarks"],"reason":"Microsoft has a repo signal matching evals and quality."},{"id":"f745fc6c-ee86-41dd-af47-e8434380f458","url":"https://onlylabs.fyi/signals/f745fc6c-ee86-41dd-af47-e8434380f458","json_url":"https://onlylabs.fyi/signals/f745fc6c-ee86-41dd-af47-e8434380f458/signal.json","source_url":"https://job-boards.greenhouse.io/xai/jobs/4803905007","title":"Member of Technical Staff - Observability","org":{"slug":"xai","name":"xAI","category":"frontier-lab"},"kind":{"key":"job_opened","label":"Job"},"occurred_at":"2026-06-10T16:49:17+00:00","first_seen_at":"2026-06-05T05:42:58.637283+00:00","date_source":"greenhouse.updated_at","context":"Palo Alto, CA","lanes":["evals"],"score":15,"matched_terms":["observability"],"reason":"xAI has a job signal matching evals and quality."},{"id":"edf95d36-e150-41df-8bf5-8b81c9888584","url":"https://onlylabs.fyi/signals/edf95d36-e150-41df-8bf5-8b81c9888584","json_url":"https://onlylabs.fyi/signals/edf95d36-e150-41df-8bf5-8b81c9888584/signal.json","source_url":"https://github.com/microsoft/azure-healthcare-digital-quality-cql-sdk","title":"microsoft/azure-healthcare-digital-quality-cql-sdk","org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"kind":{"key":"repo_new","label":"Repo"},"occurred_at":"2026-06-01T15:24:48+00:00","first_seen_at":"2026-06-05T20:58:41.438741+00:00","date_source":"source","context":"Python","lanes":["evals"],"score":14,"matched_terms":["quality"],"reason":"Microsoft has a repo signal matching evals and quality."},{"id":"cdb12cfe-a2b3-4f47-9338-dbd102ad184a","url":"https://onlylabs.fyi/signals/cdb12cfe-a2b3-4f47-9338-dbd102ad184a","json_url":"https://onlylabs.fyi/signals/cdb12cfe-a2b3-4f47-9338-dbd102ad184a/signal.json","source_url":"https://github.com/microsoft/FrontierWeekHack","title":"microsoft/FrontierWeekHack","org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"kind":{"key":"repo_new","label":"Repo"},"occurred_at":"2026-05-19T11:05:53+00:00","first_seen_at":"2026-06-05T20:58:41.438741+00:00","date_source":"source","context":"Python","lanes":["evals"],"score":14,"matched_terms":["eval"],"reason":"Microsoft has a repo signal matching evals and quality."},{"id":"dd281269-7073-459b-93bc-8f321ac0e621","url":"https://onlylabs.fyi/signals/dd281269-7073-459b-93bc-8f321ac0e621","json_url":"https://onlylabs.fyi/signals/dd281269-7073-459b-93bc-8f321ac0e621/signal.json","source_url":"https://github.com/NVIDIA/cell-eval","title":"NVIDIA/cell-eval","org":{"slug":"nvidia","name":"NVIDIA","category":"frontier-lab"},"kind":{"key":"repo_forked","label":"Fork"},"occurred_at":"2026-05-01T18:39:11+00:00","first_seen_at":"2026-06-05T20:58:43.255815+00:00","date_source":"source","context":"forked from ArcInstitute/cell-eval","lanes":["evals"],"score":14,"matched_terms":["eval"],"reason":"NVIDIA has a fork signal matching evals and quality."},{"id":"9010c0f4-73b1-4cde-a808-dd84cb4d30ea","url":"https://onlylabs.fyi/signals/9010c0f4-73b1-4cde-a808-dd84cb4d30ea","json_url":"https://onlylabs.fyi/signals/9010c0f4-73b1-4cde-a808-dd84cb4d30ea/signal.json","source_url":"https://github.com/anthropics/nix-eval-jobs","title":"anthropics/nix-eval-jobs","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"kind":{"key":"repo_forked","label":"Fork"},"occurred_at":"2026-01-26T14:26:13+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source","context":"forked from NixOS/nix-eval-jobs","lanes":["evals"],"score":14,"matched_terms":["eval"],"reason":"Anthropic has a fork signal matching evals and quality."},{"id":"4158b52c-2e66-4cb1-9ccb-5b756218cdd1","url":"https://onlylabs.fyi/signals/4158b52c-2e66-4cb1-9ccb-5b756218cdd1","json_url":"https://onlylabs.fyi/signals/4158b52c-2e66-4cb1-9ccb-5b756218cdd1/signal.json","source_url":"https://github.com/cohere-ai/terraform-oci-cis-landing-zone-observability","title":"cohere-ai/terraform-oci-cis-landing-zone-observability","org":{"slug":"cohere","name":"Cohere","category":"frontier-lab"},"kind":{"key":"repo_forked","label":"Fork"},"occurred_at":"2024-05-31T18:40:16+00:00","first_seen_at":"2026-06-05T05:43:00.174979+00:00","date_source":"source","context":"forked from oci-landing-zones/terraform-oci-modules-observability","lanes":["evals"],"score":14,"matched_terms":["observability"],"reason":"Cohere has a fork signal matching evals and quality."}]}