{"schema_version":"onlylabs.public_signal.v1","title":"Scaleway Writing: Distributed ML model inference","description":"Scaleway writing signal with public source context, captured evidence pages, related signals, and category-scoped analysis context.","url":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6","json_url":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6/signal.json","generated_at":"2026-06-08T15:45:56.07+00:00","org":{"slug":"scaleway","name":"Scaleway","category":"neocloud","category_label":"Neocloud","dossier_url":"https://onlylabs.fyi/labs/scaleway","dossier_json_url":"https://onlylabs.fyi/labs/scaleway/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6","signal_json":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6/signal.json","source":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","lab_dossier":"https://onlylabs.fyi/labs/scaleway","lab_dossier_json":"https://onlylabs.fyi/labs/scaleway/dossier.json","analysis":"https://onlylabs.fyi/analysis/scaleway","analysis_json":"https://onlylabs.fyi/analysis/scaleway/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/scaleway/evidence.json","category":"https://onlylabs.fyi/neoclouds","category_json":"https://onlylabs.fyi/neoclouds.json","category_feed":"https://onlylabs.fyi/neoclouds/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json?category=neocloud","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml?category=neocloud","data_business":null},"answer_pack":{"answer":"Scaleway published Distributed ML model inference. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Routine cloud provider blog post · [2209.01188] Petals: Collaborative Inference and Fine-tuning of Large Models Petals: Collaborative Inference and Fine-tuning of Large Models Alexander Borzunov HSE.... onlylabs links this event to 2 captured evidence pages and 6 related writing signals.","signal_desk":"talking","source_context":{"source_url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","source_host":"scaleway.com","occurred_at":"2024-12-19T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source","context":null},"context_markers":[{"label":"Lab","value":"Scaleway","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"scaleway.com","source":"source"},{"label":"Author","value":"Valentin Macheret","source":"source"},{"label":"PDF","value":"linked report","source":"source"},{"label":"Notability","value":"Routine cloud provider blog post","source":"signal"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Model card","source":"model"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":2,"captured_pages":2,"readable_pages":2,"capture_methods":["exa","plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.scaleway.com/en/blog/distributed-ml-model-inference/","https://arxiv.org/pdf/2209.01188.pdf"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-08T15:45:56.07+00:00"},"data_business":{"matches":false,"lanes":[],"matched_terms":[],"score":null,"reason":null},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6/signal.json","dossier_json":"https://onlylabs.fyi/labs/scaleway/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/scaleway/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/scaleway/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json?category=neocloud","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml?category=neocloud","category_signals_json":"https://onlylabs.fyi/signals.json?category=neocloud","data_radar_json":null,"opportunities_json":null},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","evidence_url"],"data_business_relevance":"Data-business lane extraction is scoped to frontier labs; for this category, keep conclusions tied to category-specific strategy, source evidence, and follow-up questions.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6/signal.json","required":true},{"label":"source","url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/scaleway/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/scaleway/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json?category=neocloud","required":false},{"label":"data_radar_json","url":null,"required":false}],"expected_output":["one-paragraph source-grounded interpretation","category-specific implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Scaleway's writing signal \"Distributed ML model inference\" for neocloud strategy."},"semantic_triples":[{"subject":"Scaleway","predicate":"published","object":"Distributed ML model inference","text":"Scaleway published Distributed ML model inference."},{"subject":"Distributed ML model inference","predicate":"is classified as","object":"writing signal","text":"Distributed ML model inference is classified as writing signal."},{"subject":"Distributed ML model inference","predicate":"belongs to","object":"talking desk","text":"Distributed ML model inference belongs to talking desk."},{"subject":"Distributed ML model inference","predicate":"has evidence coverage","object":"2 captured evidence pages","text":"Distributed ML model inference has evidence coverage 2 captured evidence pages."},{"subject":"Distributed ML model inference","predicate":"has captured page count","object":"2","text":"Distributed ML model inference has captured page count 2."},{"subject":"Distributed ML model inference","predicate":"has readable page count","object":"2","text":"Distributed ML model inference has readable page count 2."},{"subject":"Distributed ML model inference","predicate":"has related signal count","object":"6","text":"Distributed ML model inference has related signal count 6."},{"subject":"Distributed ML model inference","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"Distributed ML model inference has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"Distributed ML model inference","predicate":"has source host","object":"scaleway.com","text":"Distributed ML model inference has source host scaleway.com."},{"subject":"Distributed ML model inference","predicate":"has lab","object":"Scaleway","text":"Distributed ML model inference has lab Scaleway."},{"subject":"Distributed ML model inference","predicate":"has signal desk","object":"talking","text":"Distributed ML model inference has signal desk talking."},{"subject":"Distributed ML model inference","predicate":"has source host","object":"scaleway.com","text":"Distributed ML model inference has source host scaleway.com."},{"subject":"Distributed ML model inference","predicate":"has author","object":"Valentin Macheret","text":"Distributed ML model inference has author Valentin Macheret."},{"subject":"Distributed ML model inference","predicate":"has pdf","object":"linked report","text":"Distributed ML model inference has pdf linked report."},{"subject":"Distributed ML model inference","predicate":"has notability","object":"Routine cloud provider blog post","text":"Distributed ML model inference has notability Routine cloud provider blog post."},{"subject":"Distributed ML model inference","predicate":"has watch term","object":"Eval methodology","text":"Distributed ML model inference has watch term Eval methodology."},{"subject":"Distributed ML model inference","predicate":"has watch term","object":"Model card","text":"Distributed ML model inference has watch term Model card."}]},"intelligence":{"signal_desk":"talking","answer":"Scaleway published Distributed ML model inference. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Routine cloud provider blog post · [2209.01188] Petals: Collaborative Inference and Fine-tuning of Large Models Petals: Collaborative Inference and Fine-tuning of Large Models Alexander Borzunov HSE.... onlylabs links this event to 2 captured evidence pages and 6 related writing signals.","semantic_triples":[{"subject":"Scaleway","predicate":"published","object":"Distributed ML model inference","text":"Scaleway published Distributed ML model inference."},{"subject":"Distributed ML model inference","predicate":"is classified as","object":"writing signal","text":"Distributed ML model inference is classified as writing signal."},{"subject":"Distributed ML model inference","predicate":"belongs to","object":"talking desk","text":"Distributed ML model inference belongs to talking desk."},{"subject":"Distributed ML model inference","predicate":"has evidence coverage","object":"2 captured evidence pages","text":"Distributed ML model inference has evidence coverage 2 captured evidence pages."}]},"signal":{"id":"d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6","url":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6","json_url":"https://onlylabs.fyi/signals/d7dc8b5e-9b25-43b9-aa32-9eb9c5e011e6/signal.json","source_url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","title":"Distributed ML model inference","summary":"Scaleway published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2024-12-19T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source","evidence_coverage":{"target_pages":2,"captured_pages":2,"readable_pages":2,"capture_methods":["exa","plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://www.scaleway.com/en/blog/distributed-ml-model-inference/","https://arxiv.org/pdf/2209.01188.pdf"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":null},"primary_evidence_page":{"url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","final_url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","title":"Distributed ML model inference","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-07T21:16:15.276395+00:00","bytes":162792,"raw_path":"f4ddb7d2347b15216b5ec85c69c98976e88e206d5fa307bcb7a171c67d5ccf2f.html","content_hash":"4285d3ca76821261e18a7a165f1df1a0dfda6ad1dae07c0d46e3164695b31a2f","excerpt_chars":1200,"truncated":true,"excerpt":"Distributed ML model inference Deploy • Valentin Macheret • 19/12/24 • 7 min read In the state of 2024, some Large Language Models (LLM) are made of hundreds of billions of parameters. To run them you need GPUs, big GPUs. With BLOOM-176 or OPT-175 you will broadly need 3 Nvidia A100, costing $15K each. A paper published in March 2023 introduces Petals , a framework for collaborative inference (ie: process a real user&#x27;s request). It concludes that the bill can be drastically reduced. Let&#x27;s see how: we first introduce how training actually works then inference for a big model, then explain how Petals improved that. We’ll conclude by system limitations. Distributed training Distributed Machine Learning is required to achieve high performance in training large models based on very large dataset (about terabytes of data). It globally implies to train the model across multiple instances (that can host one or more GPUs), rather than on a single instance. The data is split across the instances, and each of them trains the model on its portion of the data. All resulting models are then combined to produce a final model. This approach can significantly reduce the time it takes to..."},"evidence_pages":[{"url":"https://arxiv.org/pdf/2209.01188.pdf","final_url":"https://arxiv.org/pdf/2209.01188","title":"Distributed ML model inference","http_status":200,"content_type":"application/pdf","capture_method":"exa","fetched_at":"2026-06-08T15:45:56.07+00:00","bytes":686580,"raw_path":"85d97767bfe93ca9e8e99d1b0ecec7ab30434dd6e908ca73f3681c73675fa494.pdf","content_hash":"e60c77b665f326e9a813deb4becef83aa8134187cb059c1ce77b189c0795db9e","excerpt_chars":1200,"truncated":true,"excerpt":"[2209.01188] Petals: Collaborative Inference and Fine-tuning of Large Models Petals: Collaborative Inference and Fine-tuning of Large Models Alexander Borzunov HSE University, Yandex &Dmitry Baranchuk∗ Yandex &Tim Dettmers∗ University of Washington \\ANDMax Ryabinin∗ HSE University, Yandex &Younes Belkada∗ Hugging Face, ENS Paris-Saclay &Artem Chumachenko Yandex \\ANDPavel Samygin Yandex School of Data Analysis &Colin Raffel Hugging Face Equal contribution. Correspondence to: borzunov.alexander@gmail.com Abstract Many NLP tasks benefit from using large language models (LLMs) that often have more than 100 billion parameters. With the release of BLOOM-176B and OPT-175B, everyone can download pretrained models of this scale. Still, using these models requires high-end hardware unavailable to many researchers. In some cases, LLMs can be used more affordably via RAM offloading or hosted APIs. However, these techniques have innate limitations: offloading is too slow for interactive inference, while APIs are not flexible enough for research that requires access to weights, attention or logits. In this work, we propose Petals111Petals source code and documentation are available at..."},{"url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","final_url":"https://www.scaleway.com/en/blog/distributed-ml-model-inference/","title":"Distributed ML model inference","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-07T21:16:15.276395+00:00","bytes":162792,"raw_path":"f4ddb7d2347b15216b5ec85c69c98976e88e206d5fa307bcb7a171c67d5ccf2f.html","content_hash":"4285d3ca76821261e18a7a165f1df1a0dfda6ad1dae07c0d46e3164695b31a2f","excerpt_chars":1200,"truncated":true,"excerpt":"Distributed ML model inference Deploy • Valentin Macheret • 19/12/24 • 7 min read In the state of 2024, some Large Language Models (LLM) are made of hundreds of billions of parameters. To run them you need GPUs, big GPUs. With BLOOM-176 or OPT-175 you will broadly need 3 Nvidia A100, costing $15K each. A paper published in March 2023 introduces Petals , a framework for collaborative inference (ie: process a real user&#x27;s request). It concludes that the bill can be drastically reduced. Let&#x27;s see how: we first introduce how training actually works then inference for a big model, then explain how Petals improved that. We’ll conclude by system limitations. Distributed training Distributed Machine Learning is required to achieve high performance in training large models based on very large dataset (about terabytes of data). It globally implies to train the model across multiple instances (that can host one or more GPUs), rather than on a single instance. The data is split across the instances, and each of them trains the model on its portion of the data. All resulting models are then combined to produce a final model. This approach can significantly reduce the time it takes to..."}],"related_signals":[{"id":"b0ad8b34-33f4-455d-b0bc-9c40f34e08e5","url":"https://onlylabs.fyi/signals/b0ad8b34-33f4-455d-b0bc-9c40f34e08e5","source_url":"https://www.scaleway.com/en/blog/secnumcloud-strategic-challenges/","title":"SecNumCloud: The Strategic Challenges of The Qualification","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2026-05-18T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source"},{"id":"1ebc5528-9bd0-4245-8610-f28f9e3c5701","url":"https://onlylabs.fyi/signals/1ebc5528-9bd0-4245-8610-f28f9e3c5701","source_url":"https://www.scaleway.com/en/blog/what-is-file-storage/","title":"What is File Storage ?","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2026-05-11T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source"},{"id":"25d2fe5f-0386-4230-88d5-a8441b913ea5","url":"https://onlylabs.fyi/signals/25d2fe5f-0386-4230-88d5-a8441b913ea5","source_url":"https://www.scaleway.com/en/blog/secnumcloud-trusted-cloud-standard/","title":"SecNumCloud: Understanding the trusted cloud standard","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2026-05-11T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source"},{"id":"5ab5596e-ffbc-4a9e-84f9-cb54758bd5ff","url":"https://onlylabs.fyi/signals/5ab5596e-ffbc-4a9e-84f9-cb54758bd5ff","source_url":"https://www.scaleway.com/en/blog/risc-v-servers-in-the-cloud/","title":"How Scaleway brought the first RISC-V servers to the cloud","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2026-05-04T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source"},{"id":"868f0aa0-2b52-47bf-afa3-5b656454f763","url":"https://onlylabs.fyi/signals/868f0aa0-2b52-47bf-afa3-5b656454f763","source_url":"https://www.scaleway.com/en/blog/updates-on-top-level-domain-tld-price-list/","title":"Updates on Top-Level Domain (TLD) price list","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2026-04-27T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source"},{"id":"ee327ff2-954d-407d-8d37-447f25d5ddf5","url":"https://onlylabs.fyi/signals/ee327ff2-954d-407d-8d37-447f25d5ddf5","source_url":"https://www.scaleway.com/en/blog/a-transparent-update-on-scaleway-pricing/","title":"A transparent update on Scaleway pricing","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"scaleway","name":"Scaleway","category":"neocloud"},"occurred_at":"2026-04-27T00:00:00+00:00","first_seen_at":"2026-06-05T22:32:15.955798+00:00","date_source":"source"}]}