{"schema_version":"onlylabs.public_signal.v1","title":"SambaNova Systems Repo: sambanova/generative_data_prep","description":"SambaNova Systems repo signal with public source context, captured evidence pages, related signals, and category-scoped analysis context.","url":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256","json_url":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256/signal.json","generated_at":"2026-06-11T04:10:35.230398+00:00","org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud","category_label":"Neocloud","dossier_url":"https://onlylabs.fyi/labs/sambanova","dossier_json_url":"https://onlylabs.fyi/labs/sambanova/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256","signal_json":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256/signal.json","source":"https://github.com/sambanova/generative_data_prep","lab_dossier":"https://onlylabs.fyi/labs/sambanova","lab_dossier_json":"https://onlylabs.fyi/labs/sambanova/dossier.json","analysis":"https://onlylabs.fyi/analysis/sambanova","analysis_json":"https://onlylabs.fyi/analysis/sambanova/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/sambanova/evidence.json","category":"https://onlylabs.fyi/neoclouds","category_json":"https://onlylabs.fyi/neoclouds.json","category_feed":"https://onlylabs.fyi/neoclouds/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"SambaNova Systems published sambanova/generative_data_prep (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo sambanova/generative_data_prep · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/sambanova/generative_data_prep","source_host":"github.com","occurred_at":"2023-03-28T02:04:40+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"SambaNova Systems","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"sambanova/generative_data_prep","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"67","source":"traction"},{"label":"Watch term","value":"Model card","source":"model"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/sambanova/generative_data_prep"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:10:35.230398+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256/signal.json","dossier_json":"https://onlylabs.fyi/labs/sambanova/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/sambanova/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/sambanova/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","evidence_url"],"data_business_relevance":"Data-business lane extraction is scoped to frontier labs; for this category, interpret the repository as source-grounded category strategy evidence.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256/signal.json","required":true},{"label":"source","url":"https://github.com/sambanova/generative_data_prep","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/sambanova/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/sambanova/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze SambaNova Systems's repo signal \"sambanova/generative_data_prep\" for neocloud strategy."},"semantic_triples":[{"subject":"SambaNova Systems","predicate":"published repo","object":"sambanova/generative_data_prep","text":"SambaNova Systems published repo sambanova/generative_data_prep."},{"subject":"sambanova/generative_data_prep","predicate":"is classified as","object":"repo signal","text":"sambanova/generative_data_prep is classified as repo signal."},{"subject":"sambanova/generative_data_prep","predicate":"belongs to","object":"repos desk","text":"sambanova/generative_data_prep belongs to repos desk."},{"subject":"sambanova/generative_data_prep","predicate":"has context","object":"Python","text":"sambanova/generative_data_prep has context Python."},{"subject":"sambanova/generative_data_prep","predicate":"has evidence coverage","object":"1 captured evidence page","text":"sambanova/generative_data_prep has evidence coverage 1 captured evidence page."},{"subject":"sambanova/generative_data_prep","predicate":"has captured page count","object":"1","text":"sambanova/generative_data_prep has captured page count 1."},{"subject":"sambanova/generative_data_prep","predicate":"has readable page count","object":"1","text":"sambanova/generative_data_prep has readable page count 1."},{"subject":"sambanova/generative_data_prep","predicate":"has related signal count","object":"6","text":"sambanova/generative_data_prep has related signal count 6."},{"subject":"sambanova/generative_data_prep","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"sambanova/generative_data_prep has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"sambanova/generative_data_prep","predicate":"has source host","object":"github.com","text":"sambanova/generative_data_prep has source host github.com."},{"subject":"sambanova/generative_data_prep","predicate":"has lab","object":"SambaNova Systems","text":"sambanova/generative_data_prep has lab SambaNova Systems."},{"subject":"sambanova/generative_data_prep","predicate":"has signal desk","object":"repos","text":"sambanova/generative_data_prep has signal desk repos."},{"subject":"sambanova/generative_data_prep","predicate":"has source host","object":"github.com","text":"sambanova/generative_data_prep has source host github.com."},{"subject":"sambanova/generative_data_prep","predicate":"has repository","object":"sambanova/generative_data_prep","text":"sambanova/generative_data_prep has repository sambanova/generative_data_prep."},{"subject":"sambanova/generative_data_prep","predicate":"has language","object":"Python","text":"sambanova/generative_data_prep has language Python."},{"subject":"sambanova/generative_data_prep","predicate":"has stars","object":"67","text":"sambanova/generative_data_prep has stars 67."},{"subject":"sambanova/generative_data_prep","predicate":"has watch term","object":"Model card","text":"sambanova/generative_data_prep has watch term Model card."},{"subject":"sambanova/generative_data_prep","predicate":"has watch term","object":"Data pipeline","text":"sambanova/generative_data_prep has watch term Data pipeline."}]},"intelligence":{"signal_desk":"repos","answer":"SambaNova Systems published sambanova/generative_data_prep (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo sambanova/generative_data_prep · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"SambaNova Systems","predicate":"published repo","object":"sambanova/generative_data_prep","text":"SambaNova Systems published repo sambanova/generative_data_prep."},{"subject":"sambanova/generative_data_prep","predicate":"is classified as","object":"repo signal","text":"sambanova/generative_data_prep is classified as repo signal."},{"subject":"sambanova/generative_data_prep","predicate":"belongs to","object":"repos desk","text":"sambanova/generative_data_prep belongs to repos desk."},{"subject":"sambanova/generative_data_prep","predicate":"has context","object":"Python","text":"sambanova/generative_data_prep has context Python."},{"subject":"sambanova/generative_data_prep","predicate":"has evidence coverage","object":"1 captured evidence page","text":"sambanova/generative_data_prep has evidence coverage 1 captured evidence page."}]},"signal":{"id":"82c927c1-c6f1-45de-b7e6-d49c493b7256","url":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256","json_url":"https://onlylabs.fyi/signals/82c927c1-c6f1-45de-b7e6-d49c493b7256/signal.json","source_url":"https://github.com/sambanova/generative_data_prep","title":"sambanova/generative_data_prep","summary":"SambaNova Systems published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2023-03-28T02:04:40+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/sambanova/generative_data_prep"]},"facets":{"repo":"sambanova/generative_data_prep","language":"Python"},"traction":{"github_stars":67,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://github.com/sambanova/generative_data_prep","final_url":"https://github.com/sambanova/generative_data_prep","title":"sambanova/generative_data_prep repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:10:35.230398+00:00","bytes":49487,"raw_path":"076cf435ef48b40678b40617a337057a8a303d65c89788ea44f595edaf09bc87.json","content_hash":"f673ff23d25496c1032cc351a5c9d0d11a95e395871beaf7377df7d9cd920d32","excerpt_chars":1200,"truncated":true,"excerpt":"sambanova/generative_data_prep Language: Python License: Apache-2.0 Stars: 67 Forks: 10 Open issues: 7 Created: 2023-03-28T02:04:40Z Pushed: 2026-02-04T19:00:26Z Default branch: main Fork: no Archived: no README: [![CircleCI](https://dl.circleci.com/status-badge/img/gh/sambanova/generative_data_prep/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/sambanova/generative_data_prep/tree/main) [![codecov](https://codecov.io/gh/sambanova/generative_data_prep/graph/badge.svg?token=9CYRCUOOAO)](https://codecov.io/gh/sambanova/generative_data_prep) [![Python](https://img.shields.io/badge/python-%3E=3.7-blue.svg)](https://www.python.org/) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![flake8](https://img.shields.io/badge/pep8-flake8-blue.svg)](https://github.com/PyCQA/flake8) [![bandit](https://img.shields.io/badge/security-bandit-yellow.svg)](https://github.com/PyCQA/bandit)..."},"evidence_pages":[{"url":"https://github.com/sambanova/generative_data_prep","final_url":"https://github.com/sambanova/generative_data_prep","title":"sambanova/generative_data_prep repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:10:35.230398+00:00","bytes":49487,"raw_path":"076cf435ef48b40678b40617a337057a8a303d65c89788ea44f595edaf09bc87.json","content_hash":"f673ff23d25496c1032cc351a5c9d0d11a95e395871beaf7377df7d9cd920d32","excerpt_chars":1200,"truncated":true,"excerpt":"sambanova/generative_data_prep Language: Python License: Apache-2.0 Stars: 67 Forks: 10 Open issues: 7 Created: 2023-03-28T02:04:40Z Pushed: 2026-02-04T19:00:26Z Default branch: main Fork: no Archived: no README: [![CircleCI](https://dl.circleci.com/status-badge/img/gh/sambanova/generative_data_prep/tree/main.svg?style=svg)](https://dl.circleci.com/status-badge/redirect/gh/sambanova/generative_data_prep/tree/main) [![codecov](https://codecov.io/gh/sambanova/generative_data_prep/graph/badge.svg?token=9CYRCUOOAO)](https://codecov.io/gh/sambanova/generative_data_prep) [![Python](https://img.shields.io/badge/python-%3E=3.7-blue.svg)](https://www.python.org/) [![pre-commit](https://img.shields.io/badge/pre--commit-enabled-brightgreen?logo=pre-commit)](https://github.com/pre-commit/pre-commit) [![Code style: black](https://img.shields.io/badge/code%20style-black-000000.svg)](https://github.com/psf/black) [![flake8](https://img.shields.io/badge/pep8-flake8-blue.svg)](https://github.com/PyCQA/flake8) [![bandit](https://img.shields.io/badge/security-bandit-yellow.svg)](https://github.com/PyCQA/bandit)..."}],"related_signals":[{"id":"93001bee-6577-419c-b4d6-f05a9a791774","url":"https://onlylabs.fyi/signals/93001bee-6577-419c-b4d6-f05a9a791774","source_url":"https://github.com/sambanova/sambanova-plugin-cc","title":"sambanova/sambanova-plugin-cc","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2026-03-31T17:38:58+00:00","first_seen_at":"2026-06-10T07:01:00.380184+00:00","date_source":"source"},{"id":"6f5c40bb-23c0-4fb3-b969-cd366373999b","url":"https://onlylabs.fyi/signals/6f5c40bb-23c0-4fb3-b969-cd366373999b","source_url":"https://github.com/sambanova/sambastack-tools","title":"sambanova/sambastack-tools","context":"TypeScript","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2026-01-15T17:22:22+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source"},{"id":"9e12de07-6b5a-44b3-ab49-b0c337bd3cbe","url":"https://onlylabs.fyi/signals/9e12de07-6b5a-44b3-ab49-b0c337bd3cbe","source_url":"https://github.com/sambanova/sambastack-version","title":"sambanova/sambastack-version","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2025-12-31T05:55:13+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source"},{"id":"6dda1c8d-3d77-4018-815e-fa01a06c0fd8","url":"https://onlylabs.fyi/signals/6dda1c8d-3d77-4018-815e-fa01a06c0fd8","source_url":"https://github.com/sambanova/sambanova-typescript","title":"sambanova/sambanova-typescript","context":"TypeScript","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2025-08-20T22:17:13+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source"},{"id":"689d13f9-2225-4cae-b806-ef04ae3036e3","url":"https://onlylabs.fyi/signals/689d13f9-2225-4cae-b806-ef04ae3036e3","source_url":"https://github.com/sambanova/sambanova-python","title":"sambanova/sambanova-python","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2025-08-20T22:16:11+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source"},{"id":"1d14d9ba-ff4b-4e9a-b24f-8f07236daf5b","url":"https://onlylabs.fyi/signals/1d14d9ba-ff4b-4e9a-b24f-8f07236daf5b","source_url":"https://github.com/sambanova/tokenizers","title":"sambanova/tokenizers","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"sambanova","name":"SambaNova Systems","category":"neocloud"},"occurred_at":"2025-06-24T19:56:26+00:00","first_seen_at":"2026-06-05T22:32:07.650067+00:00","date_source":"source"}]}