{"schema_version":"onlylabs.public_signal.v1","title":"Meta AI (Llama) Repo: meta-llama/synthetic-data-kit","description":"Meta AI (Llama) repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388","json_url":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388/signal.json","generated_at":"2026-06-11T03:59:57.537835+00:00","org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/meta-ai","dossier_json_url":"https://onlylabs.fyi/labs/meta-ai/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388","signal_json":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388/signal.json","source":"https://github.com/meta-llama/synthetic-data-kit","lab_dossier":"https://onlylabs.fyi/labs/meta-ai","lab_dossier_json":"https://onlylabs.fyi/labs/meta-ai/dossier.json","analysis":"https://onlylabs.fyi/analysis/meta-ai","analysis_json":"https://onlylabs.fyi/analysis/meta-ai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/meta-ai/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"},{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"Meta AI (Llama) published meta-llama/synthetic-data-kit (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo meta-llama/synthetic-data-kit · language Python · Solid repo from Meta with decent traction.. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Data demand, Evals and quality in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/meta-llama/synthetic-data-kit","source_host":"github.com","occurred_at":"2025-03-27T06:40:42+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Meta AI (Llama)","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"meta-llama/synthetic-data-kit","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"1,597","source":"traction"},{"label":"Notability","value":"Solid repo from Meta with decent traction.","source":"signal"},{"label":"Radar lane","value":"Data demand","source":"radar"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"data","source":"radar"},{"label":"Matched term","value":"dataset","source":"radar"},{"label":"Matched term","value":"datasets","source":"radar"},{"label":"Matched term","value":"quality","source":"radar"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Agents and tool use","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/meta-llama/synthetic-data-kit"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:59:57.537835+00:00"},"data_business":{"matches":true,"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data","json_url":"https://onlylabs.fyi/data-radar/data/signals.json"},{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["data","dataset","datasets","quality"],"score":30,"reason":"Meta AI (Llama) has a repo signal matching data demand, evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388/signal.json","dossier_json":"https://onlylabs.fyi/labs/meta-ai/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/meta-ai/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/meta-ai/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Data demand, Evals and quality?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388/signal.json","required":true},{"label":"source","url":"https://github.com/meta-llama/synthetic-data-kit","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/meta-ai/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/meta-ai/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Meta AI (Llama)'s repo signal \"meta-llama/synthetic-data-kit\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Meta AI (Llama)","predicate":"published repo","object":"meta-llama/synthetic-data-kit","text":"Meta AI (Llama) published repo meta-llama/synthetic-data-kit."},{"subject":"meta-llama/synthetic-data-kit","predicate":"is classified as","object":"repo signal","text":"meta-llama/synthetic-data-kit is classified as repo signal."},{"subject":"meta-llama/synthetic-data-kit","predicate":"belongs to","object":"repos desk","text":"meta-llama/synthetic-data-kit belongs to repos desk."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has context","object":"Python","text":"meta-llama/synthetic-data-kit has context Python."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has evidence coverage","object":"1 captured evidence page","text":"meta-llama/synthetic-data-kit has evidence coverage 1 captured evidence page."},{"subject":"meta-llama/synthetic-data-kit","predicate":"matches data-business lanes","object":"Data demand, Evals and quality","text":"meta-llama/synthetic-data-kit matches data-business lanes Data demand, Evals and quality."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has captured page count","object":"1","text":"meta-llama/synthetic-data-kit has captured page count 1."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has readable page count","object":"1","text":"meta-llama/synthetic-data-kit has readable page count 1."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has related signal count","object":"6","text":"meta-llama/synthetic-data-kit has related signal count 6."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"meta-llama/synthetic-data-kit has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has source host","object":"github.com","text":"meta-llama/synthetic-data-kit has source host github.com."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has lab","object":"Meta AI (Llama)","text":"meta-llama/synthetic-data-kit has lab Meta AI (Llama)."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has signal desk","object":"repos","text":"meta-llama/synthetic-data-kit has signal desk repos."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has source host","object":"github.com","text":"meta-llama/synthetic-data-kit has source host github.com."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has repository","object":"meta-llama/synthetic-data-kit","text":"meta-llama/synthetic-data-kit has repository meta-llama/synthetic-data-kit."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has language","object":"Python","text":"meta-llama/synthetic-data-kit has language Python."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has stars","object":"1,597","text":"meta-llama/synthetic-data-kit has stars 1,597."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has notability","object":"Solid repo from Meta with decent traction.","text":"meta-llama/synthetic-data-kit has notability Solid repo from Meta with decent traction.."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has radar lane","object":"Data demand","text":"meta-llama/synthetic-data-kit has radar lane Data demand."}]},"intelligence":{"signal_desk":"repos","answer":"Meta AI (Llama) published meta-llama/synthetic-data-kit (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo meta-llama/synthetic-data-kit · language Python · Solid repo from Meta with decent traction.. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Data demand, Evals and quality in the data-business radar.","semantic_triples":[{"subject":"Meta AI (Llama)","predicate":"published repo","object":"meta-llama/synthetic-data-kit","text":"Meta AI (Llama) published repo meta-llama/synthetic-data-kit."},{"subject":"meta-llama/synthetic-data-kit","predicate":"is classified as","object":"repo signal","text":"meta-llama/synthetic-data-kit is classified as repo signal."},{"subject":"meta-llama/synthetic-data-kit","predicate":"belongs to","object":"repos desk","text":"meta-llama/synthetic-data-kit belongs to repos desk."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has context","object":"Python","text":"meta-llama/synthetic-data-kit has context Python."},{"subject":"meta-llama/synthetic-data-kit","predicate":"has evidence coverage","object":"1 captured evidence page","text":"meta-llama/synthetic-data-kit has evidence coverage 1 captured evidence page."},{"subject":"meta-llama/synthetic-data-kit","predicate":"matches data-business lanes","object":"Data demand, Evals and quality","text":"meta-llama/synthetic-data-kit matches data-business lanes Data demand, Evals and quality."}]},"signal":{"id":"a67ffe41-2e85-474d-b24c-6a0fe25d4388","url":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388","json_url":"https://onlylabs.fyi/signals/a67ffe41-2e85-474d-b24c-6a0fe25d4388/signal.json","source_url":"https://github.com/meta-llama/synthetic-data-kit","title":"meta-llama/synthetic-data-kit","summary":"Meta AI (Llama) published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2025-03-27T06:40:42+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/meta-llama/synthetic-data-kit"]},"facets":{"repo":"meta-llama/synthetic-data-kit","language":"Python"},"traction":{"github_stars":1597,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"data","label":"Data demand","url":"https://onlylabs.fyi/data-radar/data"},{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":30,"matched_terms":["data","dataset","datasets","quality"],"reason":"Meta AI (Llama) has a repo signal matching data demand, evals and quality."}},"primary_evidence_page":{"url":"https://github.com/meta-llama/synthetic-data-kit","final_url":"https://github.com/meta-llama/synthetic-data-kit","title":"meta-llama/synthetic-data-kit repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:59:57.537835+00:00","bytes":22301,"raw_path":"b98050e08978de009b9ce3d39c2504456f206520b8e1c56eb0f5bd78e583d8ec.json","content_hash":"a9f5da1f867d1720a2d64bf9cc7a73a7ff386298ddba50cd05236eeb42352a1e","excerpt_chars":1200,"truncated":true,"excerpt":"meta-llama/synthetic-data-kit Description: Tool for generating high quality Synthetic datasets Language: Python License: MIT Stars: 1597 Forks: 219 Open issues: 48 Created: 2025-03-27T06:40:42Z Pushed: 2025-10-28T20:10:55Z Default branch: main Fork: no Archived: no README: Synthetic Data Kit Tool for generating high-quality synthetic datasets to fine-tune LLMs. Generate Reasoning Traces, QA Pairs, save them to a fine-tuning format with a simple CLI. > [Checkout our guide on using the tool to unlock task-specific reasoning in Llama-3 family](https://github.com/meta-llama/synthetic-data-kit/tree/main/use-cases/adding_reasoning_to_llama_3) What does Synthetic Data Kit offer? Fine-Tuning Large Language Models is easy. There are many mature tools that you can use to fine-tune Llama model family using various post-training techniques. Why target data preparation? Multiple tools support standardized formats. However, most of the times your dataset is not structured in \"user\", \"assistant\" threads or in a certain format that plays well with a fine-tuning packages. This toolkit simplifies the journey of: - Using a LLM (vLLM or any local/external API endpoint) to generate examples - Modular..."},"evidence_pages":[{"url":"https://github.com/meta-llama/synthetic-data-kit","final_url":"https://github.com/meta-llama/synthetic-data-kit","title":"meta-llama/synthetic-data-kit repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:59:57.537835+00:00","bytes":22301,"raw_path":"b98050e08978de009b9ce3d39c2504456f206520b8e1c56eb0f5bd78e583d8ec.json","content_hash":"a9f5da1f867d1720a2d64bf9cc7a73a7ff386298ddba50cd05236eeb42352a1e","excerpt_chars":1200,"truncated":true,"excerpt":"meta-llama/synthetic-data-kit Description: Tool for generating high quality Synthetic datasets Language: Python License: MIT Stars: 1597 Forks: 219 Open issues: 48 Created: 2025-03-27T06:40:42Z Pushed: 2025-10-28T20:10:55Z Default branch: main Fork: no Archived: no README: Synthetic Data Kit Tool for generating high-quality synthetic datasets to fine-tune LLMs. Generate Reasoning Traces, QA Pairs, save them to a fine-tuning format with a simple CLI. > [Checkout our guide on using the tool to unlock task-specific reasoning in Llama-3 family](https://github.com/meta-llama/synthetic-data-kit/tree/main/use-cases/adding_reasoning_to_llama_3) What does Synthetic Data Kit offer? Fine-Tuning Large Language Models is easy. There are many mature tools that you can use to fine-tune Llama model family using various post-training techniques. Why target data preparation? Multiple tools support standardized formats. However, most of the times your dataset is not structured in \"user\", \"assistant\" threads or in a certain format that plays well with a fine-tuning packages. This toolkit simplifies the journey of: - Using a LLM (vLLM or any local/external API endpoint) to generate examples - Modular..."}],"related_signals":[{"id":"b4abb783-3e1b-4bfa-8a43-bde0288e8883","url":"https://onlylabs.fyi/signals/b4abb783-3e1b-4bfa-8a43-bde0288e8883","source_url":"https://github.com/meta-llama/llama-verifications","title":"meta-llama/llama-verifications","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2025-05-13T23:02:38+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source"},{"id":"a62e2556-986f-4f97-879e-b7b70a1236ca","url":"https://onlylabs.fyi/signals/a62e2556-986f-4f97-879e-b7b70a1236ca","source_url":"https://github.com/meta-llama/llama-api-typescript","title":"meta-llama/llama-api-typescript","context":"TypeScript","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2025-04-02T23:05:49+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source"},{"id":"76516548-8a38-406d-8600-ab5fc5ec47ea","url":"https://onlylabs.fyi/signals/76516548-8a38-406d-8600-ab5fc5ec47ea","source_url":"https://github.com/meta-llama/llama-api-python","title":"meta-llama/llama-api-python","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2025-03-24T17:10:39+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source"},{"id":"9b8bac29-4f67-4376-bf2d-cb43bfd4551a","url":"https://onlylabs.fyi/signals/9b8bac29-4f67-4376-bf2d-cb43bfd4551a","source_url":"https://github.com/meta-llama/prompt-ops","title":"meta-llama/prompt-ops","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2025-03-14T17:59:40+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source"},{"id":"cc4b8209-54b4-4b31-9572-261652b4f87a","url":"https://onlylabs.fyi/signals/cc4b8209-54b4-4b31-9572-261652b4f87a","source_url":"https://github.com/meta-llama/llama-stack-ops","title":"meta-llama/llama-stack-ops","context":"Shell","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2025-01-28T14:34:10+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source"},{"id":"57becf89-298e-4dff-9357-1d7df42851d9","url":"https://onlylabs.fyi/signals/57becf89-298e-4dff-9357-1d7df42851d9","source_url":"https://github.com/meta-llama/llama-models","title":"meta-llama/llama-models","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"meta-ai","name":"Meta AI (Llama)","category":"frontier-lab"},"occurred_at":"2024-06-27T22:14:09+00:00","first_seen_at":"2026-06-05T05:42:58.710806+00:00","date_source":"source"}]}