{"schema_version":"onlylabs.public_signal.v1","title":"Anthropic Writing: Eval Awareness Browsecomp","description":"Anthropic writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2","json_url":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2/signal.json","generated_at":"2026-06-11T03:06:11.747142+00:00","org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/anthropic","dossier_json_url":"https://onlylabs.fyi/labs/anthropic/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2","signal_json":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2/signal.json","source":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","lab_dossier":"https://onlylabs.fyi/labs/anthropic","lab_dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis":"https://onlylabs.fyi/analysis/anthropic","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"Anthropic published Eval Awareness Browsecomp. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Eval awareness in Claude Opus 4.6’s BrowseComp performance \\ Anthropic Engineering at Anthropic Eval awareness in Claude Opus 4.6’s BrowseComp performance Published Mar.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","source_host":"anthropic.com","occurred_at":"2026-03-06T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","context":null},"context_markers":[{"label":"Lab","value":"Anthropic","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"anthropic.com","source":"source"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/engineering/eval-awareness-browsecomp"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:06:11.747142+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval"],"score":13,"reason":"Anthropic has a writing signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2/signal.json","dossier_json":"https://onlylabs.fyi/labs/anthropic/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/anthropic/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/anthropic/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2/signal.json","required":true},{"label":"source","url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/anthropic/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/anthropic/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Anthropic's writing signal \"Eval Awareness Browsecomp\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Eval Awareness Browsecomp","text":"Anthropic published Eval Awareness Browsecomp."},{"subject":"Eval Awareness Browsecomp","predicate":"is classified as","object":"writing signal","text":"Eval Awareness Browsecomp is classified as writing signal."},{"subject":"Eval Awareness Browsecomp","predicate":"belongs to","object":"talking desk","text":"Eval Awareness Browsecomp belongs to talking desk."},{"subject":"Eval Awareness Browsecomp","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Eval Awareness Browsecomp has evidence coverage 1 captured evidence page."},{"subject":"Eval Awareness Browsecomp","predicate":"matches data-business lanes","object":"Evals and quality","text":"Eval Awareness Browsecomp matches data-business lanes Evals and quality."},{"subject":"Eval Awareness Browsecomp","predicate":"has captured page count","object":"1","text":"Eval Awareness Browsecomp has captured page count 1."},{"subject":"Eval Awareness Browsecomp","predicate":"has readable page count","object":"1","text":"Eval Awareness Browsecomp has readable page count 1."},{"subject":"Eval Awareness Browsecomp","predicate":"has related signal count","object":"6","text":"Eval Awareness Browsecomp has related signal count 6."},{"subject":"Eval Awareness Browsecomp","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Eval Awareness Browsecomp has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Eval Awareness Browsecomp","predicate":"has source host","object":"anthropic.com","text":"Eval Awareness Browsecomp has source host anthropic.com."},{"subject":"Eval Awareness Browsecomp","predicate":"has lab","object":"Anthropic","text":"Eval Awareness Browsecomp has lab Anthropic."},{"subject":"Eval Awareness Browsecomp","predicate":"has signal desk","object":"talking","text":"Eval Awareness Browsecomp has signal desk talking."},{"subject":"Eval Awareness Browsecomp","predicate":"has source host","object":"anthropic.com","text":"Eval Awareness Browsecomp has source host anthropic.com."},{"subject":"Eval Awareness Browsecomp","predicate":"has radar lane","object":"Evals and quality","text":"Eval Awareness Browsecomp has radar lane Evals and quality."},{"subject":"Eval Awareness Browsecomp","predicate":"has matched term","object":"eval","text":"Eval Awareness Browsecomp has matched term eval."},{"subject":"Eval Awareness Browsecomp","predicate":"has watch term","object":"Eval methodology","text":"Eval Awareness Browsecomp has watch term Eval methodology."},{"subject":"Eval Awareness Browsecomp","predicate":"has watch term","object":"Data pipeline","text":"Eval Awareness Browsecomp has watch term Data pipeline."},{"subject":"Eval Awareness Browsecomp","predicate":"has watch term","object":"Infrastructure","text":"Eval Awareness Browsecomp has watch term Infrastructure."}]},"intelligence":{"signal_desk":"talking","answer":"Anthropic published Eval Awareness Browsecomp. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Eval awareness in Claude Opus 4.6’s BrowseComp performance \\ Anthropic Engineering at Anthropic Eval awareness in Claude Opus 4.6’s BrowseComp performance Published Mar.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"Anthropic","predicate":"published","object":"Eval Awareness Browsecomp","text":"Anthropic published Eval Awareness Browsecomp."},{"subject":"Eval Awareness Browsecomp","predicate":"is classified as","object":"writing signal","text":"Eval Awareness Browsecomp is classified as writing signal."},{"subject":"Eval Awareness Browsecomp","predicate":"belongs to","object":"talking desk","text":"Eval Awareness Browsecomp belongs to talking desk."},{"subject":"Eval Awareness Browsecomp","predicate":"has evidence coverage","object":"1 captured evidence page","text":"Eval Awareness Browsecomp has evidence coverage 1 captured evidence page."},{"subject":"Eval Awareness Browsecomp","predicate":"matches data-business lanes","object":"Evals and quality","text":"Eval Awareness Browsecomp matches data-business lanes Evals and quality."}]},"signal":{"id":"bd82f150-d63a-478d-92c1-84aac62d64c2","url":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2","json_url":"https://onlylabs.fyi/signals/bd82f150-d63a-478d-92c1-84aac62d64c2/signal.json","source_url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","title":"Eval Awareness Browsecomp","summary":"Anthropic published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-03-06T00:00:00.000Z","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"page.visible_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.anthropic.com/engineering/eval-awareness-browsecomp"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":13,"matched_terms":["eval"],"reason":"Anthropic has a writing signal matching evals and quality."}},"primary_evidence_page":{"url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","final_url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","title":"Eval Awareness Browsecomp","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T03:06:11.747142+00:00","bytes":148149,"raw_path":"42468eb1c49ef4da0e4e468e069784f87023def2f43c26fe74ae0396e41d3a2c.html","content_hash":"a87dcd72b081eddda88cbbeb9833560a93b770ef7c99a9243fc0ea71c1f554f0","excerpt_chars":1200,"truncated":true,"excerpt":"Eval awareness in Claude Opus 4.6’s BrowseComp performance \\ Anthropic Engineering at Anthropic Eval awareness in Claude Opus 4.6’s BrowseComp performance Published Mar 06, 2026 Evaluating Opus 4.6 on BrowseComp, we found cases where the model recognized the test, then found and decrypted answers to it—raising questions about eval integrity in web-enabled environments. BrowseComp is an evaluation designed to test how well models can find hard-to-locate information on the web. Like many benchmarks, it is vulnerable to contamination: answers leak onto the public web through academic papers, blog posts, and GitHub issues, and a model running the eval can encounter them in search results. When we evaluated Claude Opus 4.6 on BrowseComp in a multi-agent configuration, we found nine examples of this kind of contamination across 1,266 BrowseComp problems. However, we also witnessed two cases of a novel contamination pattern. Instead of inadvertently coming across a leaked answer, Claude Opus 4.6 independently hypothesized that it was being evaluated, identified which benchmark it was running in, then located and decrypted the answer key. To our knowledge, this is the first documented..."},"evidence_pages":[{"url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","final_url":"https://www.anthropic.com/engineering/eval-awareness-browsecomp","title":"Eval Awareness Browsecomp","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-11T03:06:11.747142+00:00","bytes":148149,"raw_path":"42468eb1c49ef4da0e4e468e069784f87023def2f43c26fe74ae0396e41d3a2c.html","content_hash":"a87dcd72b081eddda88cbbeb9833560a93b770ef7c99a9243fc0ea71c1f554f0","excerpt_chars":1200,"truncated":true,"excerpt":"Eval awareness in Claude Opus 4.6’s BrowseComp performance \\ Anthropic Engineering at Anthropic Eval awareness in Claude Opus 4.6’s BrowseComp performance Published Mar 06, 2026 Evaluating Opus 4.6 on BrowseComp, we found cases where the model recognized the test, then found and decrypted answers to it—raising questions about eval integrity in web-enabled environments. BrowseComp is an evaluation designed to test how well models can find hard-to-locate information on the web. Like many benchmarks, it is vulnerable to contamination: answers leak onto the public web through academic papers, blog posts, and GitHub issues, and a model running the eval can encounter them in search results. When we evaluated Claude Opus 4.6 on BrowseComp in a multi-agent configuration, we found nine examples of this kind of contamination across 1,266 BrowseComp problems. However, we also witnessed two cases of a novel contamination pattern. Instead of inadvertently coming across a leaked answer, Claude Opus 4.6 independently hypothesized that it was being evaluated, identified which benchmark it was running in, then located and decrypted the answer key. To our knowledge, this is the first documented..."}],"related_signals":[{"id":"6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","url":"https://onlylabs.fyi/signals/6c78c028-3ab4-4b33-86f7-d86c8ba9e3ba","source_url":"https://www.anthropic.com/research/agents-in-biology","title":"Agents In Biology","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-10T15:16:01+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"2648db51-9d6a-42a9-aece-a0ca5f9ce64f","url":"https://onlylabs.fyi/signals/2648db51-9d6a-42a9-aece-a0ca5f9ce64f","source_url":"https://www.anthropic.com/news/claude-fable-5-mythos-5","title":"Claude Fable 5 Mythos 5","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-09T20:27:50+00:00","first_seen_at":"2026-06-10T07:01:05.666054+00:00","date_source":"sitemap.lastmod"},{"id":"8475487f-45b4-4689-9bc5-8e4c6ca0457d","url":"https://onlylabs.fyi/signals/8475487f-45b4-4689-9bc5-8e4c6ca0457d","source_url":"https://www.anthropic.com/engineering/how-we-contain-claude","title":"How We Contain Claude","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-06T00:28:16+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","url":"https://onlylabs.fyi/signals/e4fbfcdd-15b4-41b9-b011-fd83e7068ae9","source_url":"https://www.anthropic.com/research/making-claude-a-chemist","title":"Making Claude A Chemist","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T20:13:40+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"cc62deba-9682-4751-aa6b-81c3bd7122a0","url":"https://onlylabs.fyi/signals/cc62deba-9682-4751-aa6b-81c3bd7122a0","source_url":"https://www.anthropic.com/research/measuring-agent-autonomy","title":"Measuring Agent Autonomy","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:49:18+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"},{"id":"93da14fd-7141-4e17-abd6-1c8d52435c70","url":"https://onlylabs.fyi/signals/93da14fd-7141-4e17-abd6-1c8d52435c70","source_url":"https://www.anthropic.com/research/values-wild","title":"Values Wild","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"anthropic","name":"Anthropic","category":"frontier-lab"},"occurred_at":"2026-06-05T15:38:54+00:00","first_seen_at":"2026-06-09T02:17:26.339488+00:00","date_source":"sitemap.lastmod"}]}