{"schema_version":"onlylabs.public_signal.v1","title":"CoreWeave Repo: coreweave/dataset-downloader","description":"CoreWeave repo signal with public source context, captured evidence pages, related signals, and category-scoped analysis context.","url":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2","json_url":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2/signal.json","generated_at":"2026-06-11T04:10:42.163337+00:00","org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud","category_label":"Neocloud","dossier_url":"https://onlylabs.fyi/labs/coreweave","dossier_json_url":"https://onlylabs.fyi/labs/coreweave/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2","signal_json":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2/signal.json","source":"https://github.com/coreweave/dataset-downloader","lab_dossier":"https://onlylabs.fyi/labs/coreweave","lab_dossier_json":"https://onlylabs.fyi/labs/coreweave/dossier.json","analysis":"https://onlylabs.fyi/analysis/coreweave","analysis_json":"https://onlylabs.fyi/analysis/coreweave/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/coreweave/evidence.json","category":"https://onlylabs.fyi/neoclouds","category_json":"https://onlylabs.fyi/neoclouds.json","category_feed":"https://onlylabs.fyi/neoclouds/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":null},"answer_pack":{"answer":"CoreWeave published coreweave/dataset-downloader (Go). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo coreweave/dataset-downloader · language Go. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","signal_desk":"repos","source_context":{"source_url":"https://github.com/coreweave/dataset-downloader","source_host":"github.com","occurred_at":"2022-12-05T19:30:17+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source","context":"Go"},"context_markers":[{"label":"Lab","value":"CoreWeave","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"coreweave/dataset-downloader","source":"source"},{"label":"Language","value":"Go","source":"source"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/coreweave/dataset-downloader"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T04:10:42.163337+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2/signal.json","dossier_json":"https://onlylabs.fyi/labs/coreweave/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/coreweave/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/coreweave/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","evidence_url"],"data_business_relevance":"Data-business lane extraction is scoped to frontier labs; for this category, interpret the repository as source-grounded category strategy evidence.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2/signal.json","required":true},{"label":"source","url":"https://github.com/coreweave/dataset-downloader","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/coreweave/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/coreweave/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze CoreWeave's repo signal \"coreweave/dataset-downloader\" for neocloud strategy."},"semantic_triples":[{"subject":"CoreWeave","predicate":"published repo","object":"coreweave/dataset-downloader","text":"CoreWeave published repo coreweave/dataset-downloader."},{"subject":"coreweave/dataset-downloader","predicate":"is classified as","object":"repo signal","text":"coreweave/dataset-downloader is classified as repo signal."},{"subject":"coreweave/dataset-downloader","predicate":"belongs to","object":"repos desk","text":"coreweave/dataset-downloader belongs to repos desk."},{"subject":"coreweave/dataset-downloader","predicate":"has context","object":"Go","text":"coreweave/dataset-downloader has context Go."},{"subject":"coreweave/dataset-downloader","predicate":"has evidence coverage","object":"1 captured evidence page","text":"coreweave/dataset-downloader has evidence coverage 1 captured evidence page."},{"subject":"coreweave/dataset-downloader","predicate":"has captured page count","object":"1","text":"coreweave/dataset-downloader has captured page count 1."},{"subject":"coreweave/dataset-downloader","predicate":"has readable page count","object":"1","text":"coreweave/dataset-downloader has readable page count 1."},{"subject":"coreweave/dataset-downloader","predicate":"has related signal count","object":"6","text":"coreweave/dataset-downloader has related signal count 6."},{"subject":"coreweave/dataset-downloader","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"coreweave/dataset-downloader has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"coreweave/dataset-downloader","predicate":"has source host","object":"github.com","text":"coreweave/dataset-downloader has source host github.com."},{"subject":"coreweave/dataset-downloader","predicate":"has lab","object":"CoreWeave","text":"coreweave/dataset-downloader has lab CoreWeave."},{"subject":"coreweave/dataset-downloader","predicate":"has signal desk","object":"repos","text":"coreweave/dataset-downloader has signal desk repos."},{"subject":"coreweave/dataset-downloader","predicate":"has source host","object":"github.com","text":"coreweave/dataset-downloader has source host github.com."},{"subject":"coreweave/dataset-downloader","predicate":"has repository","object":"coreweave/dataset-downloader","text":"coreweave/dataset-downloader has repository coreweave/dataset-downloader."},{"subject":"coreweave/dataset-downloader","predicate":"has language","object":"Go","text":"coreweave/dataset-downloader has language Go."},{"subject":"coreweave/dataset-downloader","predicate":"has watch term","object":"Data pipeline","text":"coreweave/dataset-downloader has watch term Data pipeline."},{"subject":"coreweave/dataset-downloader","predicate":"has watch term","object":"Infrastructure","text":"coreweave/dataset-downloader has watch term Infrastructure."}]},"intelligence":{"signal_desk":"repos","answer":"CoreWeave published coreweave/dataset-downloader (Go). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo coreweave/dataset-downloader · language Go. onlylabs links this event to 1 captured evidence page and 6 related repo signals.","semantic_triples":[{"subject":"CoreWeave","predicate":"published repo","object":"coreweave/dataset-downloader","text":"CoreWeave published repo coreweave/dataset-downloader."},{"subject":"coreweave/dataset-downloader","predicate":"is classified as","object":"repo signal","text":"coreweave/dataset-downloader is classified as repo signal."},{"subject":"coreweave/dataset-downloader","predicate":"belongs to","object":"repos desk","text":"coreweave/dataset-downloader belongs to repos desk."},{"subject":"coreweave/dataset-downloader","predicate":"has context","object":"Go","text":"coreweave/dataset-downloader has context Go."},{"subject":"coreweave/dataset-downloader","predicate":"has evidence coverage","object":"1 captured evidence page","text":"coreweave/dataset-downloader has evidence coverage 1 captured evidence page."}]},"signal":{"id":"aee19cfe-df13-4380-a32f-cc5e03f7a4a2","url":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2","json_url":"https://onlylabs.fyi/signals/aee19cfe-df13-4380-a32f-cc5e03f7a4a2/signal.json","source_url":"https://github.com/coreweave/dataset-downloader","title":"coreweave/dataset-downloader","summary":"CoreWeave published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Go","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2022-12-05T19:30:17+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/coreweave/dataset-downloader"]},"facets":{"repo":"coreweave/dataset-downloader","language":"Go"},"traction":{"github_stars":0,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://github.com/coreweave/dataset-downloader","final_url":"https://github.com/coreweave/dataset-downloader","title":"coreweave/dataset-downloader repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:10:42.163337+00:00","bytes":10084,"raw_path":"b3c597c575ec6c7bf81e38eea92d22c3a6bb4e13e180b75daf80fb8feb35d06b.json","content_hash":"c4c0a67628f13a8c28e69a302159585adb75caa2bdaf19b56e2f6594af3260bf","excerpt_chars":1200,"truncated":true,"excerpt":"coreweave/dataset-downloader Language: Go License: MIT Stars: 0 Forks: 2 Open issues: 2 Created: 2022-12-05T19:30:17Z Pushed: 2026-04-29T19:19:31Z Default branch: main Fork: no Archived: no README: dataset-downloader <!-- omit in toc --> Contains code that build into docker images that can be used to download datasets for training machine learning models. Contents: - [smashwords-downloader](#smashwords-downloader) smashwords-downloader This script downloads plain text files of Western Romance books publicaly avaible on [Smashworks](https://www.smashwords.com/). This website has been used to create popular Machine Learning datasets like [BookCorpus](https://huggingface.co/datasets/bookcorpus). The source code located in `cmd/smashwords-downloader`. It can be built into an executable with the command `go build -o main *.go`. The `main.go` script takes the following arugments: ``` -data_dir string directory that the book files will download to (default \"./data\") -id integer The cooresponding ID for the smashswords url you want to scrape https://www.smashwords.com/books/category/1105/downloads/0/free would have an ID of 1105 (default is 1245 == western romance) -pageitems integer The..."},"evidence_pages":[{"url":"https://github.com/coreweave/dataset-downloader","final_url":"https://github.com/coreweave/dataset-downloader","title":"coreweave/dataset-downloader repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T04:10:42.163337+00:00","bytes":10084,"raw_path":"b3c597c575ec6c7bf81e38eea92d22c3a6bb4e13e180b75daf80fb8feb35d06b.json","content_hash":"c4c0a67628f13a8c28e69a302159585adb75caa2bdaf19b56e2f6594af3260bf","excerpt_chars":1200,"truncated":true,"excerpt":"coreweave/dataset-downloader Language: Go License: MIT Stars: 0 Forks: 2 Open issues: 2 Created: 2022-12-05T19:30:17Z Pushed: 2026-04-29T19:19:31Z Default branch: main Fork: no Archived: no README: dataset-downloader <!-- omit in toc --> Contains code that build into docker images that can be used to download datasets for training machine learning models. Contents: - [smashwords-downloader](#smashwords-downloader) smashwords-downloader This script downloads plain text files of Western Romance books publicaly avaible on [Smashworks](https://www.smashwords.com/). This website has been used to create popular Machine Learning datasets like [BookCorpus](https://huggingface.co/datasets/bookcorpus). The source code located in `cmd/smashwords-downloader`. It can be built into an executable with the command `go build -o main *.go`. The `main.go` script takes the following arugments: ``` -data_dir string directory that the book files will download to (default \"./data\") -id integer The cooresponding ID for the smashswords url you want to scrape https://www.smashwords.com/books/category/1105/downloads/0/free would have an ID of 1105 (default is 1245 == western romance) -pageitems integer The..."}],"related_signals":[{"id":"422ade4e-b6f6-4ef8-954b-6437047b1709","url":"https://onlylabs.fyi/signals/422ade4e-b6f6-4ef8-954b-6437047b1709","source_url":"https://github.com/coreweave/cwsandbox-client","title":"coreweave/cwsandbox-client","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2025-12-15T22:40:36+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source"},{"id":"88a0f25a-16fa-49b7-a51d-84c60d8f0a14","url":"https://onlylabs.fyi/signals/88a0f25a-16fa-49b7-a51d-84c60d8f0a14","source_url":"https://github.com/coreweave/cwic","title":"coreweave/cwic","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2025-10-10T17:23:31+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source"},{"id":"71bbfc16-e5cf-43a5-8874-5c7f3f6846bc","url":"https://onlylabs.fyi/signals/71bbfc16-e5cf-43a5-8874-5c7f3f6846bc","source_url":"https://github.com/coreweave/actions-public","title":"coreweave/actions-public","context":"JavaScript","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2025-04-23T18:33:02+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source"},{"id":"d7f3556b-5171-40bb-8486-447bf8050fe3","url":"https://onlylabs.fyi/signals/d7f3556b-5171-40bb-8486-447bf8050fe3","source_url":"https://github.com/coreweave/reference-architecture","title":"coreweave/reference-architecture","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2025-04-14T14:33:44+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source"},{"id":"3b73a958-d528-40d5-b0ac-c33fb80cf74f","url":"https://onlylabs.fyi/signals/3b73a958-d528-40d5-b0ac-c33fb80cf74f","source_url":"https://github.com/coreweave/slurm-containers","title":"coreweave/slurm-containers","context":"Dockerfile","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2025-03-12T20:12:00+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source"},{"id":"35194cb7-5cd9-469b-a8b6-076dacfedbe9","url":"https://onlylabs.fyi/signals/35194cb7-5cd9-469b-a8b6-076dacfedbe9","source_url":"https://github.com/coreweave/ioperftest","title":"coreweave/ioperftest","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"coreweave","name":"CoreWeave","category":"neocloud"},"occurred_at":"2025-01-31T14:50:59+00:00","first_seen_at":"2026-06-05T05:43:02.359092+00:00","date_source":"source"}]}