{"schema_version":"onlylabs.public_signal.v1","title":"Qwen (Alibaba Cloud) Writing: GSPO: Towards Scalable Reinforcement Learning for Language Models","description":"Qwen (Alibaba Cloud) writing signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef","json_url":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef/signal.json","generated_at":"2026-06-07T21:15:49.809781+00:00","org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/qwen","dossier_json_url":"https://onlylabs.fyi/labs/qwen/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef","signal_json":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef/signal.json","source":"https://qwenlm.github.io/blog/gspo/","lab_dossier":"https://onlylabs.fyi/labs/qwen","lab_dossier_json":"https://onlylabs.fyi/labs/qwen/dossier.json","analysis":"https://onlylabs.fyi/analysis/qwen","analysis_json":"https://onlylabs.fyi/analysis/qwen/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/qwen/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":"https://onlylabs.fyi/topics/talking","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}]}},"answer_pack":{"answer":"Qwen (Alibaba Cloud) published GSPO: Towards Scalable Reinforcement Learning for Language Models. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Notable research post from a major lab · GSPO: Towards Scalable Reinforcement Learning for Language Models | Qwen We have a new blog! View this page at qwen.ai . This page will automatically redirect in 5.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure, Safety and policy in the data-business radar.","signal_desk":"talking","source_context":{"source_url":"https://qwenlm.github.io/blog/gspo/","source_host":"qwenlm.github.io","occurred_at":"2025-07-27T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date","context":null},"context_markers":[{"label":"Lab","value":"Qwen (Alibaba Cloud)","source":"signal"},{"label":"Signal desk","value":"talking","source":"signal"},{"label":"Source host","value":"qwenlm.github.io","source":"source"},{"label":"Notability","value":"Notable research post from a major lab","source":"signal"},{"label":"Radar lane","value":"Infrastructure","source":"radar"},{"label":"Radar lane","value":"Safety and policy","source":"radar"},{"label":"Matched term","value":"training","source":"radar"},{"label":"Matched term","value":"scaling","source":"radar"},{"label":"Matched term","value":"policy","source":"radar"},{"label":"Watch term","value":"RL environments","source":"evidence"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Infrastructure","source":"evidence"},{"label":"Watch term","value":"Safety and alignment","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://qwenlm.github.io/blog/gspo/"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-07T21:15:49.809781+00:00"},"data_business":{"matches":true,"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure","json_url":"https://onlylabs.fyi/data-radar/infrastructure/signals.json"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety","json_url":"https://onlylabs.fyi/data-radar/safety/signals.json"}],"matched_terms":["training","scaling","policy"],"score":27,"reason":"Qwen (Alibaba Cloud) has a writing signal matching infrastructure, safety and policy."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef/signal.json","dossier_json":"https://onlylabs.fyi/labs/qwen/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/qwen/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/qwen/evidence.json","topic_signals_json":"https://onlylabs.fyi/topics/talking/signals.json","topic_feed":"https://onlylabs.fyi/topics/talking/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","evidence_focus":["post title","source URL","captured page text","HN traction","linked model or paper references","publication date"],"extraction_questions":["Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which writing reframes a recent release, model, hiring wave, or policy stance?","Which posts mention data, evals, infrastructure, safety, or deployment workflows?"],"signal_questions":["What public theme, launch framing, or research direction does this writing signal expose?","Which themes are labs choosing to explain publicly?","Which posts are attracting outside discussion?","Which data-business lane explains this signal: Infrastructure, Safety and policy?","Do the 6 related writing signals show a repeated pattern?"],"output_fields":["org","theme","public_framing","traction","data_business_lane","evidence_url"],"data_business_relevance":"Public writing supplies the narrative layer over raw signals and helps identify which frontier-lab priorities are becoming externally legible.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef/signal.json","required":true},{"label":"source","url":"https://qwenlm.github.io/blog/gspo/","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/qwen/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/qwen/evidence.json","required":true},{"label":"topic_signals_json","url":"https://onlylabs.fyi/topics/talking/signals.json","required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Qwen (Alibaba Cloud)'s writing signal \"GSPO: Towards Scalable Reinforcement Learning for Language Models\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Qwen (Alibaba Cloud)","predicate":"published","object":"GSPO: Towards Scalable Reinforcement Learning for Language Models","text":"Qwen (Alibaba Cloud) published GSPO: Towards Scalable Reinforcement Learning for Language Models."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"is classified as","object":"writing signal","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models is classified as writing signal."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"belongs to","object":"talking desk","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models belongs to talking desk."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has evidence coverage","object":"1 captured evidence page","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has evidence coverage 1 captured evidence page."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"matches data-business lanes","object":"Infrastructure, Safety and policy","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models matches data-business lanes Infrastructure, Safety and policy."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has captured page count","object":"1","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has captured page count 1."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has readable page count","object":"1","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has readable page count 1."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has related signal count","object":"6","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has related signal count 6."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has analysis playbook objective","object":"Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has analysis playbook objective Turn public writing and discussion into a readable map of research themes, product framing, policy posture, launch narratives, and market attention.."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has source host","object":"qwenlm.github.io","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has source host qwenlm.github.io."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has lab","object":"Qwen (Alibaba Cloud)","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has lab Qwen (Alibaba Cloud)."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has signal desk","object":"talking","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has signal desk talking."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has source host","object":"qwenlm.github.io","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has source host qwenlm.github.io."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has notability","object":"Notable research post from a major lab","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has notability Notable research post from a major lab."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has radar lane","object":"Infrastructure","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has radar lane Infrastructure."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has radar lane","object":"Safety and policy","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has radar lane Safety and policy."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has matched term","object":"training","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has matched term training."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has matched term","object":"scaling","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has matched term scaling."}]},"intelligence":{"signal_desk":"talking","answer":"Qwen (Alibaba Cloud) published GSPO: Towards Scalable Reinforcement Learning for Language Models. This talking signal gives public context for research themes, product direction, policy, or launch framing. High-signal details: Notable research post from a major lab · GSPO: Towards Scalable Reinforcement Learning for Language Models | Qwen We have a new blog! View this page at qwen.ai . This page will automatically redirect in 5.... onlylabs links this event to 1 captured evidence page and 6 related writing signals. It also maps to Infrastructure, Safety and policy in the data-business radar.","semantic_triples":[{"subject":"Qwen (Alibaba Cloud)","predicate":"published","object":"GSPO: Towards Scalable Reinforcement Learning for Language Models","text":"Qwen (Alibaba Cloud) published GSPO: Towards Scalable Reinforcement Learning for Language Models."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"is classified as","object":"writing signal","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models is classified as writing signal."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"belongs to","object":"talking desk","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models belongs to talking desk."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"has evidence coverage","object":"1 captured evidence page","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models has evidence coverage 1 captured evidence page."},{"subject":"GSPO: Towards Scalable Reinforcement Learning for Language Models","predicate":"matches data-business lanes","object":"Infrastructure, Safety and policy","text":"GSPO: Towards Scalable Reinforcement Learning for Language Models matches data-business lanes Infrastructure, Safety and policy."}]},"signal":{"id":"c662ae5e-1dd9-42a6-8297-87bc670d59ef","url":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef","json_url":"https://onlylabs.fyi/signals/c662ae5e-1dd9-42a6-8297-87bc670d59ef/signal.json","source_url":"https://qwenlm.github.io/blog/gspo/","title":"GSPO: Towards Scalable Reinforcement Learning for Language Models","summary":"Qwen (Alibaba Cloud) published a writing signal. onlylabs watches public writing for research themes, product direction, and model-launch context.","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-07-27T07:00:00+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://qwenlm.github.io/blog/gspo/"]},"facets":{},"traction":{"github_stars":null,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"infrastructure","label":"Infrastructure","url":"https://onlylabs.fyi/data-radar/infrastructure"},{"key":"safety","label":"Safety and policy","url":"https://onlylabs.fyi/data-radar/safety"}],"score":27,"matched_terms":["training","scaling","policy"],"reason":"Qwen (Alibaba Cloud) has a writing signal matching infrastructure, safety and policy."}},"primary_evidence_page":{"url":"https://qwenlm.github.io/blog/gspo/","final_url":"https://qwenlm.github.io/blog/gspo/","title":"GSPO: Towards Scalable Reinforcement Learning for Language Models","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-07T21:15:49.809781+00:00","bytes":25141,"raw_path":"8d28766e7ba5e0cab3974fb4b46a27fc402f8d91598ca694aafc479347673335.html","content_hash":"1b68a00a5f70a887f968a17c717b51bb6139ac3856b3b1206e05b321d30a9fab","excerpt_chars":1200,"truncated":true,"excerpt":"GSPO: Towards Scalable Reinforcement Learning for Language Models | Qwen We have a new blog! View this page at qwen.ai . This page will automatically redirect in 5 seconds. If you are not redirected automatically, please click the button below. Go Now GSPO: Towards Scalable Reinforcement Learning for Language Models July 27, 2025 · 5 min · 916 words · Qwen Team | Translations: 简体中文 PAPER DISCORD Introduction # Reinforcement Learning (RL) has emerged as a pivotal paradigm for scaling language models and enhancing their deep reasoning and problem-solving capabilities. To scale RL, the foremost prerequisite is maintaining stable and robust training dynamics. However, we observe that existing RL algorithms (such as GRPO) exhibit severe instability issues during long training and lead to irreversible model collapse, hindering further performance improvements with increased compute. To enable successful RL scaling, we propose the Group Sequence Policy Optimization (GSPO) algorithm. Unlike previous RL algorithms, GSPO defines the importance ratio based on sequence likelihood and performs sequence-level clipping, rewarding, and optimization . Compared to GRPO, GSPO demonstrates remarkable..."},"evidence_pages":[{"url":"https://qwenlm.github.io/blog/gspo/","final_url":"https://qwenlm.github.io/blog/gspo/","title":"GSPO: Towards Scalable Reinforcement Learning for Language Models","http_status":200,"content_type":"text/html; charset=utf-8","capture_method":"plain","fetched_at":"2026-06-07T21:15:49.809781+00:00","bytes":25141,"raw_path":"8d28766e7ba5e0cab3974fb4b46a27fc402f8d91598ca694aafc479347673335.html","content_hash":"1b68a00a5f70a887f968a17c717b51bb6139ac3856b3b1206e05b321d30a9fab","excerpt_chars":1200,"truncated":true,"excerpt":"GSPO: Towards Scalable Reinforcement Learning for Language Models | Qwen We have a new blog! View this page at qwen.ai . This page will automatically redirect in 5 seconds. If you are not redirected automatically, please click the button below. Go Now GSPO: Towards Scalable Reinforcement Learning for Language Models July 27, 2025 · 5 min · 916 words · Qwen Team | Translations: 简体中文 PAPER DISCORD Introduction # Reinforcement Learning (RL) has emerged as a pivotal paradigm for scaling language models and enhancing their deep reasoning and problem-solving capabilities. To scale RL, the foremost prerequisite is maintaining stable and robust training dynamics. However, we observe that existing RL algorithms (such as GRPO) exhibit severe instability issues during long training and lead to irreversible model collapse, hindering further performance improvements with increased compute. To enable successful RL scaling, we propose the Group Sequence Policy Optimization (GSPO) algorithm. Unlike previous RL algorithms, GSPO defines the importance ratio based on sequence likelihood and performs sequence-level clipping, rewarding, and optimization . Compared to GRPO, GSPO demonstrates remarkable..."}],"related_signals":[{"id":"54029233-5b0e-4748-aae1-6013ae3553d0","url":"https://onlylabs.fyi/signals/54029233-5b0e-4748-aae1-6013ae3553d0","source_url":"https://qwenlm.github.io/blog/qwen3guard/","title":"Qwen3Guard: Real-time Safety for Your Token Stream","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-09-22T20:00:00+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date"},{"id":"4e64c872-3d4f-4d01-8908-35b65736eb6e","url":"https://onlylabs.fyi/signals/4e64c872-3d4f-4d01-8908-35b65736eb6e","source_url":"https://qwenlm.github.io/blog/qwen-image-edit/","title":"Qwen-Image-Edit: Image Editing with Higher Quality and Efficiency","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-08-18T17:30:00+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date"},{"id":"27bd1b4a-28a4-423c-abb0-e5d24251de65","url":"https://onlylabs.fyi/signals/27bd1b4a-28a4-423c-abb0-e5d24251de65","source_url":"https://qwenlm.github.io/blog/qwen-image/","title":"Qwen-Image: Crafting with Native Text Rendering","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-08-04T14:08:30+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date"},{"id":"22c0d58c-5ce1-4d1f-97df-de5ebd764517","url":"https://onlylabs.fyi/signals/22c0d58c-5ce1-4d1f-97df-de5ebd764517","source_url":"https://qwenlm.github.io/blog/qwen-mt/","title":"Qwen-MT: Where Speed Meets Smart Translation","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-07-24T14:00:00+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date"},{"id":"b6e0bdb2-ffd3-4aed-b44b-732a5e0424a5","url":"https://onlylabs.fyi/signals/b6e0bdb2-ffd3-4aed-b44b-732a5e0424a5","source_url":"https://qwenlm.github.io/blog/qwen3-coder/","title":"Qwen3-Coder: Agentic Coding in the World","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-07-22T13:00:00+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date"},{"id":"35f4be74-c0bf-4885-a33e-c5c16b063bfa","url":"https://onlylabs.fyi/signals/35f4be74-c0bf-4885-a33e-c5c16b063bfa","source_url":"https://qwenlm.github.io/blog/qwen-tts/","title":"Time to Speak Some Dialects, Qwen-TTS!","context":null,"kind":{"key":"post_published","label":"Writing"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2025-06-27T07:01:30+00:00","first_seen_at":"2026-06-05T05:42:59.088452+00:00","date_source":"rss.item_date"}]}