{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Repo: anthropics/evals","description":"Anthropic repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69","json_url":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69/signal.json","generated_at":"2026-06-11T03:59:55.6175+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69","signal_json":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69/signal.json","source":"https://github.com/anthropics/evals","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"Anthropic published anthropics/evals. This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo anthropics/evals · anthropics/evals License: CC-BY-4.0 Stars: 395 Forks: 46 Open issues: 1 Created: 2022-12-12T22:41:18Z Pushed: 2024-07-02T07:36:39Z Default branch: main Fork: no.... onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/anthropics/evals","source_host":"github.com","occurred_at":"2022-12-12T22:41:18+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"anthropics/evals","source":"source"},{"label":"Stars","value":"395","source":"traction"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Matched term","value":"evals","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/anthropics/evals"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:59:55.6175+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval","evals"],"score":16,"reason":"Anthropic has a repo signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69/signal.json","required":true},{"label":"source","url":"https://github.com/anthropics/evals","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's repo signal \"anthropics/evals\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Anthropic","predicate":"published repo","object":"anthropics/evals","text":"Anthropic published repo anthropics/evals."},{"subject":"anthropics/evals","predicate":"is classified as","object":"repo signal","text":"anthropics/evals is classified as repo signal."},{"subject":"anthropics/evals","predicate":"belongs to","object":"repos desk","text":"anthropics/evals belongs to repos desk."},{"subject":"anthropics/evals","predicate":"has evidence coverage","object":"1 captured evidence page","text":"anthropics/evals has evidence coverage 1 captured evidence page."},{"subject":"anthropics/evals","predicate":"matches data-business lanes","object":"Evals and quality","text":"anthropics/evals matches data-business lanes Evals and quality."},{"subject":"anthropics/evals","predicate":"has captured page count","object":"1","text":"anthropics/evals has captured page count 1."},{"subject":"anthropics/evals","predicate":"has readable page count","object":"1","text":"anthropics/evals has readable page count 1."},{"subject":"anthropics/evals","predicate":"has related signal count","object":"6","text":"anthropics/evals has related signal count 6."},{"subject":"anthropics/evals","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"anthropics/evals has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"anthropics/evals","predicate":"has source host","object":"github.com","text":"anthropics/evals has source host github.com."},{"subject":"anthropics/evals","predicate":"has lab","object":"Anthropic","text":"anthropics/evals has lab Anthropic."},{"subject":"anthropics/evals","predicate":"has signal desk","object":"repos","text":"anthropics/evals has signal desk repos."},{"subject":"anthropics/evals","predicate":"has source host","object":"github.com","text":"anthropics/evals has source host github.com."},{"subject":"anthropics/evals","predicate":"has repository","object":"anthropics/evals","text":"anthropics/evals has repository anthropics/evals."},{"subject":"anthropics/evals","predicate":"has stars","object":"395","text":"anthropics/evals has stars 395."},{"subject":"anthropics/evals","predicate":"has radar lane","object":"Evals and quality","text":"anthropics/evals has radar lane Evals and quality."},{"subject":"anthropics/evals","predicate":"has matched term","object":"eval","text":"anthropics/evals has matched term eval."},{"subject":"anthropics/evals","predicate":"has matched term","object":"evals","text":"anthropics/evals has matched term evals."}]},"intelligence":{"signal_desk":"repos","answer":"Anthropic published anthropics/evals. This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo anthropics/evals · anthropics/evals License: CC-BY-4.0 Stars: 395 Forks: 46 Open issues: 1 Created: 2022-12-12T22:41:18Z Pushed: 2024-07-02T07:36:39Z Default branch: main Fork: no.... onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"Anthropic","predicate":"published repo","object":"anthropics/evals","text":"Anthropic published repo anthropics/evals."},{"subject":"anthropics/evals","predicate":"is classified as","object":"repo signal","text":"anthropics/evals is classified as repo signal."},{"subject":"anthropics/evals","predicate":"belongs to","object":"repos desk","text":"anthropics/evals belongs to repos desk."},{"subject":"anthropics/evals","predicate":"has evidence coverage","object":"1 captured evidence page","text":"anthropics/evals has evidence coverage 1 captured evidence page."},{"subject":"anthropics/evals","predicate":"matches data-business lanes","object":"Evals and quality","text":"anthropics/evals matches data-business lanes Evals and quality."}]},"signal":{"id":"02948669-b2f1-46f6-b3ee-5c5f19245c69","url":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69","json_url":"https://onlylabs.fyi/signals/02948669-b2f1-46f6-b3ee-5c5f19245c69/signal.json","source_url":"https://github.com/anthropics/evals","title":"anthropics/evals","summary":"Anthropic published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2022-12-12T22:41:18+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/anthropics/evals"]},"facets":{"repo":"anthropics/evals"},"traction":{"github_stars":395,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":16,"matched_terms":["eval","evals"],"reason":"Anthropic has a repo signal matching evals and quality."}},"primary_evidence_page":{"url":"https://github.com/anthropics/evals","final_url":"https://github.com/anthropics/evals","title":"anthropics/evals repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:59:55.6175+00:00","bytes":11896,"raw_path":"49b1f862554d23d195f1a58376e46a56d148d5d52e25d1d7db6d7fd4d5dc853e.json","content_hash":"f12aa20e376d5a77f735df10fad0699198c63ec39ec1480dec8788cc2fd51ae7","excerpt_chars":1200,"truncated":true,"excerpt":"anthropics/evals License: CC-BY-4.0 Stars: 395 Forks: 46 Open issues: 1 Created: 2022-12-12T22:41:18Z Pushed: 2024-07-02T07:36:39Z Default branch: main Fork: no Archived: no README: Model-Written Evaluation Datasets This repository includes datasets written by language models, used in our paper on \"Discovering Language Model Behaviors with Model-Written Evaluations.\" We intend the datasets to be useful to: 1. Those who are interested in understanding the quality and properties of model-generated data 2. Those who wish to use our datasets to evaluate other models for the behaviors we examined in our work (e.g., related to model persona, sycophancy, advanced AI risks, and gender bias) The evaluations were generated to be asked to dialogue agents (e.g., a model finetuned explicitly respond to a user's utterances, or a pretrained language model prompted to behave like a dialogue agent). However, it is possible to adapt the data to test other kinds of models as well. We describe each of our collections of datasets below: 1. `persona/`: Datasets testing models for various aspects of their behavior related to their stated political and religious views, personality, moral beliefs, and..."},"evidence_pages":[{"url":"https://github.com/anthropics/evals","final_url":"https://github.com/anthropics/evals","title":"anthropics/evals repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:59:55.6175+00:00","bytes":11896,"raw_path":"49b1f862554d23d195f1a58376e46a56d148d5d52e25d1d7db6d7fd4d5dc853e.json","content_hash":"f12aa20e376d5a77f735df10fad0699198c63ec39ec1480dec8788cc2fd51ae7","excerpt_chars":1200,"truncated":true,"excerpt":"anthropics/evals License: CC-BY-4.0 Stars: 395 Forks: 46 Open issues: 1 Created: 2022-12-12T22:41:18Z Pushed: 2024-07-02T07:36:39Z Default branch: main Fork: no Archived: no README: Model-Written Evaluation Datasets This repository includes datasets written by language models, used in our paper on \"Discovering Language Model Behaviors with Model-Written Evaluations.\" We intend the datasets to be useful to: 1. Those who are interested in understanding the quality and properties of model-generated data 2. Those who wish to use our datasets to evaluate other models for the behaviors we examined in our work (e.g., related to model persona, sycophancy, advanced AI risks, and gender bias) The evaluations were generated to be asked to dialogue agents (e.g., a model finetuned explicitly respond to a user's utterances, or a pretrained language model prompted to behave like a dialogue agent). However, it is possible to adapt the data to test other kinds of models as well. We describe each of our collections of datasets below: 1. `persona/`: Datasets testing models for various aspects of their behavior related to their stated political and religious views, personality, moral beliefs, and..."}],"related_signals":[{"id":"d655a3de-1616-4e38-b9a0-27254762c1f0","url":"https://onlylabs.fyi/signals/d655a3de-1616-4e38-b9a0-27254762c1f0","source_url":"https://github.com/anthropics/defending-code-reference-harness","title":"anthropics/defending-code-reference-harness","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-05-22T16:00:56+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source"},{"id":"f91bba87-e295-4641-8aa0-9fa254e149b1","url":"https://onlylabs.fyi/signals/f91bba87-e295-4641-8aa0-9fa254e149b1","source_url":"https://github.com/anthropics/ClaudeForFoundationModels","title":"anthropics/ClaudeForFoundationModels","context":"Swift","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-05-20T15:41:18+00:00","first_seen_at":"2026-06-09T07:00:07.44979+00:00","date_source":"source"},{"id":"7f417861-19b7-493f-b6d7-bf9ef54a9a1f","url":"https://onlylabs.fyi/signals/7f417861-19b7-493f-b6d7-bf9ef54a9a1f","source_url":"https://github.com/anthropics/cargo-nix-plugin","title":"anthropics/cargo-nix-plugin","context":"Rust","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-05-19T17:07:44+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source"},{"id":"0c515bfd-ccd4-4a21-868f-944fd4945f19","url":"https://onlylabs.fyi/signals/0c515bfd-ccd4-4a21-868f-944fd4945f19","source_url":"https://github.com/anthropics/scone-bench","title":"anthropics/scone-bench","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-05-12T22:17:19+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source"},{"id":"7f529da9-d1d8-4424-aaf5-face018ae4f8","url":"https://onlylabs.fyi/signals/7f529da9-d1d8-4424-aaf5-face018ae4f8","source_url":"https://github.com/anthropics/html-effectiveness","title":"anthropics/html-effectiveness","context":"HTML","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-05-12T06:25:16+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source"},{"id":"3ea41c6f-444c-4f24-a8f7-ad037263bde0","url":"https://onlylabs.fyi/signals/3ea41c6f-444c-4f24-a8f7-ad037263bde0","source_url":"https://github.com/anthropics/cwc-workshops","title":"anthropics/cwc-workshops","context":"TypeScript","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-05-06T03:53:01+00:00","first_seen_at":"2026-06-05T05:42:58.841369+00:00","date_source":"source"}]}