{"schema_version":"onlylabs.public_signal.v1","title":"ByteDance (Doubao/Seed) Repo: ByteDance-Seed/DATAMASK","description":"ByteDance (Doubao/Seed) repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02","json_url":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02/signal.json","generated_at":"2026-06-11T03:03:07.872635+00:00","org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/bytedance","dossier_json_url":"https://onlylabs.fyi/labs/bytedance/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02","signal_json":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02/signal.json","source":"https://github.com/ByteDance-Seed/DATAMASK","lab_dossier":"https://onlylabs.fyi/labs/bytedance","lab_dossier_json":"https://onlylabs.fyi/labs/bytedance/dossier.json","analysis":"https://onlylabs.fyi/analysis/bytedance","analysis_json":"https://onlylabs.fyi/analysis/bytedance/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/bytedance/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"ByteDance (Doubao/Seed) published ByteDance-Seed/DATAMASK (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo ByteDance-Seed/DATAMASK · language Python · New repo, low traction.. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Data demand, Infrastructure, Safety and policy in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/ByteDance-Seed/DATAMASK","source_host":"github.com","occurred_at":"2025-12-29T11:31:57+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"ByteDance (Doubao/Seed)","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"ByteDance-Seed/DATAMASK","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"18","source":"traction"},{"label":"Notability","value":"New repo, low traction.","source":"signal"},{"label":"Radar lane","value":"Data demand","source":"radar"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"data","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Matched term","value":"policy","source":"radar"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/ByteDance-Seed/DATAMASK"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:03:07.872635+00:00"},"data_business":{"matches":true,"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["data","training","policy"],"score":38,"reason":"ByteDance (Doubao/Seed) has a repo signal matching data demand, infrastructure, safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02/signal.json","dossier_json":"https://onlylabs.fyi/labs/bytedance/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/bytedance/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/bytedance/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Data demand, Infrastructure, Safety and policy?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02/signal.json","required":true},{"label":"source","url":"https://github.com/ByteDance-Seed/DATAMASK","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/bytedance/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/bytedance/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze ByteDance (Doubao/Seed)'s repo signal \"ByteDance-Seed/DATAMASK\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"ByteDance (Doubao/Seed)","predicate":"published repo","object":"ByteDance-Seed/DATAMASK","text":"ByteDance (Doubao/Seed) published repo ByteDance-Seed/DATAMASK."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"is classified as","object":"repo signal","text":"ByteDance-Seed/DATAMASK is classified as repo signal."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"belongs to","object":"repos desk","text":"ByteDance-Seed/DATAMASK belongs to repos desk."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has context","object":"Python","text":"ByteDance-Seed/DATAMASK has context Python."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has evidence coverage","object":"1 captured evidence page","text":"ByteDance-Seed/DATAMASK has evidence coverage 1 captured evidence page."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"matches data-business lanes","object":"Data demand, Infrastructure, Safety and policy","text":"ByteDance-Seed/DATAMASK matches data-business lanes Data demand, Infrastructure, Safety and policy."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has captured page count","object":"1","text":"ByteDance-Seed/DATAMASK has captured page count 1."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has readable page count","object":"1","text":"ByteDance-Seed/DATAMASK has readable page count 1."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has related signal count","object":"6","text":"ByteDance-Seed/DATAMASK has related signal count 6."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"ByteDance-Seed/DATAMASK has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has source host","object":"github.com","text":"ByteDance-Seed/DATAMASK has source host github.com."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has lab","object":"ByteDance (Doubao/Seed)","text":"ByteDance-Seed/DATAMASK has lab ByteDance (Doubao/Seed)."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has signal desk","object":"repos","text":"ByteDance-Seed/DATAMASK has signal desk repos."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has source host","object":"github.com","text":"ByteDance-Seed/DATAMASK has source host github.com."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has repository","object":"ByteDance-Seed/DATAMASK","text":"ByteDance-Seed/DATAMASK has repository ByteDance-Seed/DATAMASK."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has language","object":"Python","text":"ByteDance-Seed/DATAMASK has language Python."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has stars","object":"18","text":"ByteDance-Seed/DATAMASK has stars 18."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has notability","object":"New repo, low traction.","text":"ByteDance-Seed/DATAMASK has notability New repo, low traction.."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has radar lane","object":"Data demand","text":"ByteDance-Seed/DATAMASK has radar lane Data demand."}]},"intelligence":{"signal_desk":"repos","answer":"ByteDance (Doubao/Seed) published ByteDance-Seed/DATAMASK (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo ByteDance-Seed/DATAMASK · language Python · New repo, low traction.. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Data demand, Infrastructure, Safety and policy in the data-business radar.","semantic_triples":[{"subject":"ByteDance (Doubao/Seed)","predicate":"published repo","object":"ByteDance-Seed/DATAMASK","text":"ByteDance (Doubao/Seed) published repo ByteDance-Seed/DATAMASK."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"is classified as","object":"repo signal","text":"ByteDance-Seed/DATAMASK is classified as repo signal."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"belongs to","object":"repos desk","text":"ByteDance-Seed/DATAMASK belongs to repos desk."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has context","object":"Python","text":"ByteDance-Seed/DATAMASK has context Python."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"has evidence coverage","object":"1 captured evidence page","text":"ByteDance-Seed/DATAMASK has evidence coverage 1 captured evidence page."},{"subject":"ByteDance-Seed/DATAMASK","predicate":"matches data-business lanes","object":"Data demand, Infrastructure, Safety and policy","text":"ByteDance-Seed/DATAMASK matches data-business lanes Data demand, Infrastructure, Safety and policy."}]},"signal":{"id":"4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02","url":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02","json_url":"https://onlylabs.fyi/signals/4ed8e3b1-9a26-4ca9-95d1-d2f2621e6e02/signal.json","source_url":"https://github.com/ByteDance-Seed/DATAMASK","title":"ByteDance-Seed/DATAMASK","summary":"ByteDance (Doubao/Seed) published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2025-12-29T11:31:57+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/ByteDance-Seed/DATAMASK"]},"facets":{"repo":"ByteDance-Seed/DATAMASK","language":"Python"},"traction":{"github_stars":18,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data"},{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":38,"matched_terms":["data","training","policy"],"reason":"ByteDance (Doubao/Seed) has a repo signal matching data demand, infrastructure, safety and policy."}},"primary_evidence_page":{"url":"https://github.com/ByteDance-Seed/DATAMASK","final_url":"https://github.com/ByteDance-Seed/DATAMASK","title":"ByteDance-Seed/DATAMASK repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:03:07.872635+00:00","bytes":17213,"raw_path":"a9946cf41d59b4120d3fb85834cec9adfea448cec0376d6a41d2ac55b3bfc4a0.json","content_hash":"c5c37eec3a865081839ab32e82a56302a4d26dcdc1d49726bcb9d52cd64bedd5","excerpt_chars":1200,"truncated":true,"excerpt":"ByteDance-Seed/DATAMASK Description: Joint Selection for Large-Scale Pre-Training Data via Policy Gradient-based Mask Learning Language: Python License: Apache-2.0 Stars: 18 Forks: 0 Open issues: 0 Created: 2025-12-29T11:31:57Z Pushed: 2026-01-04T03:15:45Z Default branch: main Fork: no Archived: no README: <img src=\"./figs/datamask_icon.png\" width=\"50\"> DATAMASK English | [中文README](README.zh_CN.md) <p align=\"center\" width=\"100%\"> </p> <div id=\"top\" align=\"center\"> Joint Selection for Large-Scale Pre-Training Data via Policy Gradient-based Mask Learning ----------------------------- <img src=\"https://img.shields.io/badge/Version-1.0.0-blue.svg\" alt=\"Version\"> <img src=\"https://img.shields.io/badge/License-Apache_2.0-green.svg\" alt=\"License\"> <h4> |<a href=\"https://arxiv.org/abs/2512.24265\"> 📑 Paper </a> | <a href=\"https://huggingface.co/datasets/DATA-MASK/FineWeb-Mask\"> 🐱 FineWeb-Mask Data </a> | </h4> <!-- **Authors:** --> **Ziqing Fan**<sup>1,2 </sup>, Yuqiao Xian<sup>1,\\* </sup>, Yan Sun<sup>3</sup>, Li Shen<sup>4 <!-- **Affiliations:** --> _<sup>1</sup> ByteDance Seed, <sup>2</sup> Shanghai Jiao Tong University, <sup>3</sup> University of Sydney, <sup>4</sup> Sun Yat-sen..."},"evidence_pages":[{"url":"https://github.com/ByteDance-Seed/DATAMASK","final_url":"https://github.com/ByteDance-Seed/DATAMASK","title":"ByteDance-Seed/DATAMASK repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:03:07.872635+00:00","bytes":17213,"raw_path":"a9946cf41d59b4120d3fb85834cec9adfea448cec0376d6a41d2ac55b3bfc4a0.json","content_hash":"c5c37eec3a865081839ab32e82a56302a4d26dcdc1d49726bcb9d52cd64bedd5","excerpt_chars":1200,"truncated":true,"excerpt":"ByteDance-Seed/DATAMASK Description: Joint Selection for Large-Scale Pre-Training Data via Policy Gradient-based Mask Learning Language: Python License: Apache-2.0 Stars: 18 Forks: 0 Open issues: 0 Created: 2025-12-29T11:31:57Z Pushed: 2026-01-04T03:15:45Z Default branch: main Fork: no Archived: no README: <img src=\"./figs/datamask_icon.png\" width=\"50\"> DATAMASK English | [中文README](README.zh_CN.md) <p align=\"center\" width=\"100%\"> </p> <div id=\"top\" align=\"center\"> Joint Selection for Large-Scale Pre-Training Data via Policy Gradient-based Mask Learning ----------------------------- <img src=\"https://img.shields.io/badge/Version-1.0.0-blue.svg\" alt=\"Version\"> <img src=\"https://img.shields.io/badge/License-Apache_2.0-green.svg\" alt=\"License\"> <h4> |<a href=\"https://arxiv.org/abs/2512.24265\"> 📑 Paper </a> | <a href=\"https://huggingface.co/datasets/DATA-MASK/FineWeb-Mask\"> 🐱 FineWeb-Mask Data </a> | </h4> <!-- **Authors:** --> **Ziqing Fan**<sup>1,2 </sup>, Yuqiao Xian<sup>1,\\* </sup>, Yan Sun<sup>3</sup>, Li Shen<sup>4 <!-- **Affiliations:** --> _<sup>1</sup> ByteDance Seed, <sup>2</sup> Shanghai Jiao Tong University, <sup>3</sup> University of Sydney, <sup>4</sup> Sun Yat-sen..."}],"related_signals":[{"id":"55e0369a-bd79-48f1-a004-0cd4fdfb03e4","url":"https://onlylabs.fyi/signals/55e0369a-bd79-48f1-a004-0cd4fdfb03e4","source_url":"https://github.com/ByteDance-Seed/TaskMem","title":"ByteDance-Seed/TaskMem","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2026-05-29T03:03:17+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source"},{"id":"957704c5-24c9-4360-aafa-f8382010ed1d","url":"https://onlylabs.fyi/signals/957704c5-24c9-4360-aafa-f8382010ed1d","source_url":"https://github.com/ByteDance-Seed/Cola-DLM","title":"ByteDance-Seed/Cola-DLM","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2026-05-15T07:09:04+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source"},{"id":"22ca2261-1970-4664-95e2-1c4d404c0eb3","url":"https://onlylabs.fyi/signals/22ca2261-1970-4664-95e2-1c4d404c0eb3","source_url":"https://github.com/ByteDance-Seed/THEMol","title":"ByteDance-Seed/THEMol","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2026-05-15T05:36:08+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source"},{"id":"6a0854bb-9654-4e4e-92dc-e8fff1d6c7b9","url":"https://onlylabs.fyi/signals/6a0854bb-9654-4e4e-92dc-e8fff1d6c7b9","source_url":"https://github.com/ByteDance-Seed/felis","title":"ByteDance-Seed/felis","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2026-05-06T08:54:43+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source"},{"id":"05e5f295-f84c-41c4-89df-baa3606008b1","url":"https://onlylabs.fyi/signals/05e5f295-f84c-41c4-89df-baa3606008b1","source_url":"https://github.com/ByteDance-Seed/SimArt","title":"ByteDance-Seed/SimArt","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2026-04-23T03:25:52+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source"},{"id":"106937f9-35dd-41e2-adde-98e74527e1b8","url":"https://onlylabs.fyi/signals/106937f9-35dd-41e2-adde-98e74527e1b8","source_url":"https://github.com/ByteDance-Seed/In-Place-TTT","title":"ByteDance-Seed/In-Place-TTT","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"bytedance","name":"ByteDance (Doubao/Seed)","category":"frontier-lab"},"occurred_at":"2026-04-07T05:50:45+00:00","first_seen_at":"2026-06-05T20:58:46.965734+00:00","date_source":"source"}]}