{"schema_version":"onlylabs.public_signal.v1","title":"Microsoft Repo: microsoft/smc-clustering","description":"Microsoft repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d","json_url":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d/signal.json","generated_at":"2026-06-27T00:33:33.945Z","evidence_latest_fetched_at":"2026-06-20T07:04:48.295683+00:00","signal_first_seen_at":"2026-06-20T07:00:30.944907+00:00","org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/microsoft","dossier_json_url":"https://onlylabs.fyi/labs/microsoft/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d","signal_json":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d/signal.json","source":"https://github.com/microsoft/smc-clustering","lab_dossier":"https://onlylabs.fyi/labs/microsoft","lab_dossier_json":"https://onlylabs.fyi/labs/microsoft/dossier.json","analysis":"https://onlylabs.fyi/analysis/microsoft","analysis_json":"https://onlylabs.fyi/analysis/microsoft/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/microsoft/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}]}},"answer_pack":{"answer":"Microsoft published microsoft/smc-clustering (Jupyter Notebook). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo microsoft/smc-clustering · language Jupyter Notebook · New repo with minimal traction. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Infrastructure in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/microsoft/smc-clustering","source_host":"github.com","occurred_at":"2024-02-14T10:27:21+00:00","first_seen_at":"2026-06-20T07:00:30.944907+00:00","date_source":"source","context":"Jupyter Notebook"},"context_markers":[{"label":"Lab","value":"Microsoft","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"microsoft/smc-clustering","source":"source"},{"label":"Language","value":"Jupyter Notebook","source":"source"},{"label":"Stars","value":"2","source":"traction"},{"label":"Notability","value":"New repo with minimal traction","source":"signal"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Matched term","value":"cluster","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/microsoft/smc-clustering"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-20T07:04:48.295683+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"}],"matched_terms":["cluster"],"score":14,"reason":"Microsoft has a repo signal matching infrastructure."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d/signal.json","dossier_json":"https://onlylabs.fyi/labs/microsoft/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/microsoft/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/microsoft/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Infrastructure?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d/signal.json","required":true},{"label":"source","url":"https://github.com/microsoft/smc-clustering","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/microsoft/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/microsoft/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Microsoft's repo signal \"microsoft/smc-clustering\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Microsoft","predicate":"published repo","object":"microsoft/smc-clustering","text":"Microsoft published repo microsoft/smc-clustering."},{"subject":"microsoft/smc-clustering","predicate":"is classified as","object":"repo signal","text":"microsoft/smc-clustering is classified as repo signal."},{"subject":"microsoft/smc-clustering","predicate":"belongs to","object":"repos desk","text":"microsoft/smc-clustering belongs to repos desk."},{"subject":"microsoft/smc-clustering","predicate":"has context","object":"Jupyter Notebook","text":"microsoft/smc-clustering has context Jupyter Notebook."},{"subject":"microsoft/smc-clustering","predicate":"has evidence coverage","object":"1 captured evidence page","text":"microsoft/smc-clustering has evidence coverage 1 captured evidence page."},{"subject":"microsoft/smc-clustering","predicate":"matches data-business lanes","object":"Infrastructure","text":"microsoft/smc-clustering matches data-business lanes Infrastructure."},{"subject":"microsoft/smc-clustering","predicate":"has captured page count","object":"1","text":"microsoft/smc-clustering has captured page count 1."},{"subject":"microsoft/smc-clustering","predicate":"has readable page count","object":"1","text":"microsoft/smc-clustering has readable page count 1."},{"subject":"microsoft/smc-clustering","predicate":"has related signal count","object":"6","text":"microsoft/smc-clustering has related signal count 6."},{"subject":"microsoft/smc-clustering","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"microsoft/smc-clustering has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"microsoft/smc-clustering","predicate":"has source host","object":"github.com","text":"microsoft/smc-clustering has source host github.com."},{"subject":"microsoft/smc-clustering","predicate":"has lab","object":"Microsoft","text":"microsoft/smc-clustering has lab Microsoft."},{"subject":"microsoft/smc-clustering","predicate":"has signal desk","object":"repos","text":"microsoft/smc-clustering has signal desk repos."},{"subject":"microsoft/smc-clustering","predicate":"has source host","object":"github.com","text":"microsoft/smc-clustering has source host github.com."},{"subject":"microsoft/smc-clustering","predicate":"has repository","object":"microsoft/smc-clustering","text":"microsoft/smc-clustering has repository microsoft/smc-clustering."},{"subject":"microsoft/smc-clustering","predicate":"has language","object":"Jupyter Notebook","text":"microsoft/smc-clustering has language Jupyter Notebook."},{"subject":"microsoft/smc-clustering","predicate":"has stars","object":"2","text":"microsoft/smc-clustering has stars 2."},{"subject":"microsoft/smc-clustering","predicate":"has notability","object":"New repo with minimal traction","text":"microsoft/smc-clustering has notability New repo with minimal traction."},{"subject":"microsoft/smc-clustering","predicate":"has radar lane","object":"Infrastructure","text":"microsoft/smc-clustering has radar lane Infrastructure."}]},"intelligence":{"signal_desk":"repos","answer":"Microsoft published microsoft/smc-clustering (Jupyter Notebook). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo microsoft/smc-clustering · language Jupyter Notebook · New repo with minimal traction. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Infrastructure in the data-business radar.","semantic_triples":[{"subject":"Microsoft","predicate":"published repo","object":"microsoft/smc-clustering","text":"Microsoft published repo microsoft/smc-clustering."},{"subject":"microsoft/smc-clustering","predicate":"is classified as","object":"repo signal","text":"microsoft/smc-clustering is classified as repo signal."},{"subject":"microsoft/smc-clustering","predicate":"belongs to","object":"repos desk","text":"microsoft/smc-clustering belongs to repos desk."},{"subject":"microsoft/smc-clustering","predicate":"has context","object":"Jupyter Notebook","text":"microsoft/smc-clustering has context Jupyter Notebook."},{"subject":"microsoft/smc-clustering","predicate":"has evidence coverage","object":"1 captured evidence page","text":"microsoft/smc-clustering has evidence coverage 1 captured evidence page."},{"subject":"microsoft/smc-clustering","predicate":"matches data-business lanes","object":"Infrastructure","text":"microsoft/smc-clustering matches data-business lanes Infrastructure."}]},"signal":{"id":"c027dff7-0910-45a9-95c2-22017cf7956d","url":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d","json_url":"https://onlylabs.fyi/signals/c027dff7-0910-45a9-95c2-22017cf7956d/signal.json","source_url":"https://github.com/microsoft/smc-clustering","title":"microsoft/smc-clustering","summary":"Microsoft published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Jupyter Notebook","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2024-02-14T10:27:21+00:00","first_seen_at":"2026-06-20T07:00:30.944907+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/microsoft/smc-clustering"]},"facets":{"repo":"microsoft/smc-clustering","language":"Jupyter Notebook"},"traction":{"github_stars":2,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"}],"score":14,"matched_terms":["cluster"],"reason":"Microsoft has a repo signal matching infrastructure."}},"primary_evidence_page":{"is_primary":true,"source_match":true,"url":"https://github.com/microsoft/smc-clustering","final_url":"https://github.com/microsoft/smc-clustering","title":"microsoft/smc-clustering repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-20T07:04:48.295683+00:00","bytes":9552,"raw_path":"9fdc1d70fed5cfcb591dc135fe75701da6d0de4632a5214f24879a2238784ddc.json","content_hash":"cd1728517e428b3ca1776ac6ac38c566fbb1f7b786547a7666e21341fce1f72d","excerpt_chars":1200,"truncated":true,"excerpt":"microsoft/smc-clustering Description: Scalable Model-Based Clustering with Sequential Monte Carlo Language: Jupyter Notebook License: MIT Stars: 2 Forks: 0 Open issues: 13 Created: 2024-02-14T10:27:21Z Pushed: 2026-06-20T00:51:23Z Default branch: main Fork: no Archived: no README: Scalable Model-Based Clustering with Sequential Monte Carlo This repository contains an implementation of the split SMC algorithm proposed in the paper _Scalable Model-Based Clustering with Sequential Monte Carlo_ as well as code for reproducing the experiments. Split SMC is an online, Bayesian clustering algorithm that decomposes large problems into approximately independent subproblems, producing a compact representation of the posterior distribution over clusterings. This allows it to scale to tasks with complex cluster-likelihoods and a large number of clusters, where existing methods struggle due to prohibitive memory or computational cost. Our approach is motivated by the knowledge base construction problem, which requires clustering large datasets of free-text fragments of information. Authors: Connie Trojan, Pavel Myshkov, Paul Fearnhead, James Hensman, Tom Minka, Christopher Nemeth This work is..."},"evidence_pages":[],"related_signals":[{"id":"6ae6413d-d27f-4b6c-8c34-260526c5ad4a","url":"https://onlylabs.fyi/signals/6ae6413d-d27f-4b6c-8c34-260526c5ad4a","source_url":"https://github.com/microsoft/Agentic-Shiksha","title":"microsoft/Agentic-Shiksha","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2026-06-26T06:44:56+00:00","first_seen_at":"2026-06-26T23:50:36.940197+00:00","date_source":"source"},{"id":"fa96889d-12d2-430e-9871-9405d53d73bd","url":"https://onlylabs.fyi/signals/fa96889d-12d2-430e-9871-9405d53d73bd","source_url":"https://github.com/microsoft/XBOX-Game-Development-Documentation","title":"microsoft/XBOX-Game-Development-Documentation","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2026-06-26T01:33:33+00:00","first_seen_at":"2026-06-26T23:50:36.940197+00:00","date_source":"source"},{"id":"234643d2-74f7-4307-8794-f6e80fec2878","url":"https://onlylabs.fyi/signals/234643d2-74f7-4307-8794-f6e80fec2878","source_url":"https://github.com/microsoft/LAB513-Build-an-AI-Powered-FAQ-Assistant-with-Azure-SQL","title":"microsoft/LAB513-Build-an-AI-Powered-FAQ-Assistant-with-Azure-SQL","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2026-06-25T19:50:36+00:00","first_seen_at":"2026-06-26T23:50:36.940197+00:00","date_source":"source"},{"id":"ac8e1b0a-9de4-4215-b6e6-d443de8b7710","url":"https://onlylabs.fyi/signals/ac8e1b0a-9de4-4215-b6e6-d443de8b7710","source_url":"https://github.com/microsoft/TechLab-Module-19-Defender-for-Storage","title":"microsoft/TechLab-Module-19-Defender-for-Storage","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2026-06-25T19:41:12+00:00","first_seen_at":"2026-06-26T23:50:36.940197+00:00","date_source":"source"},{"id":"74d1798f-3e3a-4dbb-b4a3-5492da857f7f","url":"https://onlylabs.fyi/signals/74d1798f-3e3a-4dbb-b4a3-5492da857f7f","source_url":"https://github.com/microsoft/Custom-Copilot-Connector-using-Connector-SDK","title":"microsoft/Custom-Copilot-Connector-using-Connector-SDK","context":"C#","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2026-06-25T11:45:08+00:00","first_seen_at":"2026-06-26T23:50:36.940197+00:00","date_source":"source"},{"id":"2b3f3fc0-60f0-4892-934e-c720429e9476","url":"https://onlylabs.fyi/signals/2b3f3fc0-60f0-4892-934e-c720429e9476","source_url":"https://github.com/microsoft/trainer-ai-labs","title":"microsoft/trainer-ai-labs","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"microsoft","name":"Microsoft","category":"frontier-lab"},"occurred_at":"2026-06-25T07:07:28+00:00","first_seen_at":"2026-06-26T23:50:37.018387+00:00","date_source":"source"}]}