{"schema_version":"onlylabs.public_signal.v1","title":"Qwen (Alibaba Cloud) Repo: QwenLM/ConsisEval","description":"Qwen (Alibaba Cloud) repo signal with public source context, captured evidence pages, related signals, and data-business radar classification.","url":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902","json_url":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902/signal.json","generated_at":"2026-06-11T03:59:05.329164+00:00","org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab","category_label":"Frontier lab","dossier_url":"https://onlylabs.fyi/labs/qwen","dossier_json_url":"https://onlylabs.fyi/labs/qwen/dossier.json"},"related_urls":{"signal":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902","signal_json":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902/signal.json","source":"https://github.com/QwenLM/ConsisEval","lab_dossier":"https://onlylabs.fyi/labs/qwen","lab_dossier_json":"https://onlylabs.fyi/labs/qwen/dossier.json","analysis":"https://onlylabs.fyi/analysis/qwen","analysis_json":"https://onlylabs.fyi/analysis/qwen/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/qwen/evidence.json","category":"https://onlylabs.fyi/frontier","category_json":"https://onlylabs.fyi/frontier.json","category_feed":"https://onlylabs.fyi/frontier/feed.xml","category_signals_json":"https://onlylabs.fyi/signals.json","topic":null,"topic_signals_json":null,"topic_feed":null,"data_business":{"radar":"https://onlylabs.fyi/data-radar","radar_json":"https://onlylabs.fyi/data-radar.json","opportunities":"https://onlylabs.fyi/opportunities","opportunities_json":"https://onlylabs.fyi/opportunities.json","lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}]}},"answer_pack":{"answer":"Qwen (Alibaba Cloud) published QwenLM/ConsisEval (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo QwenLM/ConsisEval · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","signal_desk":"repos","source_context":{"source_url":"https://github.com/QwenLM/ConsisEval","source_host":"github.com","occurred_at":"2024-06-17T07:46:23+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source","context":"Python"},"context_markers":[{"label":"Lab","value":"Qwen (Alibaba Cloud)","source":"signal"},{"label":"Signal desk","value":"repos","source":"signal"},{"label":"Source host","value":"github.com","source":"source"},{"label":"Repository","value":"QwenLM/ConsisEval","source":"source"},{"label":"Language","value":"Python","source":"source"},{"label":"Stars","value":"15","source":"traction"},{"label":"Radar lane","value":"Evals and quality","source":"radar"},{"label":"Matched term","value":"eval","source":"radar"},{"label":"Watch term","value":"Eval methodology","source":"evidence"},{"label":"Watch term","value":"Data pipeline","source":"evidence"}],"evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/QwenLM/ConsisEval"],"related_signals":6,"has_source_url":true,"latest_page_fetched_at":"2026-06-11T03:59:05.329164+00:00"},"data_business":{"matches":true,"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals","json_url":"https://onlylabs.fyi/data-radar/evals/signals.json"}],"matched_terms":["eval"],"score":14,"reason":"Qwen (Alibaba Cloud) has a repo signal matching evals and quality."},"agent_handoff":{"signal_json":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902/signal.json","dossier_json":"https://onlylabs.fyi/labs/qwen/dossier.json","analysis_json":"https://onlylabs.fyi/analysis/qwen/analysis.json","analysis_evidence_json":"https://onlylabs.fyi/analysis/qwen/evidence.json","topic_signals_json":null,"topic_feed":null,"category_signals_json":"https://onlylabs.fyi/signals.json","data_radar_json":"https://onlylabs.fyi/data-radar.json","opportunities_json":"https://onlylabs.fyi/opportunities.json"},"analysis_playbook":{"objective":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","evidence_focus":["repo name","owner","description","language","stars","source URL","first seen time","data, eval, infra, safety, and product terms"],"extraction_questions":["What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Is the repo new evidence for a lab direction that is not yet in writing or releases?","Which related signals should an analyst inspect next?"],"signal_questions":["What does this new repository reveal before a formal announcement exists?","What technical area does this repository expose?","Does the repo imply eval, data, infrastructure, agent, or deployment work?","Which data-business lane explains this signal: Evals and quality?","Do the 6 related repo signals show a repeated pattern?"],"output_fields":["org","repo","technical_theme","data_business_lane","evidence_url"],"data_business_relevance":"New repositories can expose organization build priorities early, especially around internal tooling, eval infrastructure, data systems, deployment, and agent workflows.","required_sources":[{"label":"signal_json","url":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902/signal.json","required":true},{"label":"source","url":"https://github.com/QwenLM/ConsisEval","required":true},{"label":"dossier_json","url":"https://onlylabs.fyi/labs/qwen/dossier.json","required":true},{"label":"analysis_evidence_json","url":"https://onlylabs.fyi/analysis/qwen/evidence.json","required":true},{"label":"topic_signals_json","url":null,"required":false},{"label":"data_radar_json","url":"https://onlylabs.fyi/data-radar.json","required":true}],"expected_output":["one-paragraph source-grounded interpretation","data-business implication","confidence and missing evidence","recommended next source to inspect"],"prompt_seed":"Using only the linked onlylabs JSON, captured source context, and cited evidence, analyze Qwen (Alibaba Cloud)'s repo signal \"QwenLM/ConsisEval\" for frontier lab strategy and data-business implications."},"semantic_triples":[{"subject":"Qwen (Alibaba Cloud)","predicate":"published repo","object":"QwenLM/ConsisEval","text":"Qwen (Alibaba Cloud) published repo QwenLM/ConsisEval."},{"subject":"QwenLM/ConsisEval","predicate":"is classified as","object":"repo signal","text":"QwenLM/ConsisEval is classified as repo signal."},{"subject":"QwenLM/ConsisEval","predicate":"belongs to","object":"repos desk","text":"QwenLM/ConsisEval belongs to repos desk."},{"subject":"QwenLM/ConsisEval","predicate":"has context","object":"Python","text":"QwenLM/ConsisEval has context Python."},{"subject":"QwenLM/ConsisEval","predicate":"has evidence coverage","object":"1 captured evidence page","text":"QwenLM/ConsisEval has evidence coverage 1 captured evidence page."},{"subject":"QwenLM/ConsisEval","predicate":"matches data-business lanes","object":"Evals and quality","text":"QwenLM/ConsisEval matches data-business lanes Evals and quality."},{"subject":"QwenLM/ConsisEval","predicate":"has captured page count","object":"1","text":"QwenLM/ConsisEval has captured page count 1."},{"subject":"QwenLM/ConsisEval","predicate":"has readable page count","object":"1","text":"QwenLM/ConsisEval has readable page count 1."},{"subject":"QwenLM/ConsisEval","predicate":"has related signal count","object":"6","text":"QwenLM/ConsisEval has related signal count 6."},{"subject":"QwenLM/ConsisEval","predicate":"has analysis playbook objective","object":"Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.","text":"QwenLM/ConsisEval has analysis playbook objective Turn new repository signals into early evidence of tooling, eval, infrastructure, model-adjacent, or product work before it appears in polished launch channels.."},{"subject":"QwenLM/ConsisEval","predicate":"has source host","object":"github.com","text":"QwenLM/ConsisEval has source host github.com."},{"subject":"QwenLM/ConsisEval","predicate":"has lab","object":"Qwen (Alibaba Cloud)","text":"QwenLM/ConsisEval has lab Qwen (Alibaba Cloud)."},{"subject":"QwenLM/ConsisEval","predicate":"has signal desk","object":"repos","text":"QwenLM/ConsisEval has signal desk repos."},{"subject":"QwenLM/ConsisEval","predicate":"has source host","object":"github.com","text":"QwenLM/ConsisEval has source host github.com."},{"subject":"QwenLM/ConsisEval","predicate":"has repository","object":"QwenLM/ConsisEval","text":"QwenLM/ConsisEval has repository QwenLM/ConsisEval."},{"subject":"QwenLM/ConsisEval","predicate":"has language","object":"Python","text":"QwenLM/ConsisEval has language Python."},{"subject":"QwenLM/ConsisEval","predicate":"has stars","object":"15","text":"QwenLM/ConsisEval has stars 15."},{"subject":"QwenLM/ConsisEval","predicate":"has radar lane","object":"Evals and quality","text":"QwenLM/ConsisEval has radar lane Evals and quality."},{"subject":"QwenLM/ConsisEval","predicate":"has matched term","object":"eval","text":"QwenLM/ConsisEval has matched term eval."}]},"intelligence":{"signal_desk":"repos","answer":"Qwen (Alibaba Cloud) published QwenLM/ConsisEval (Python). This repository signal exposes tooling, eval, infrastructure, or model-adjacent work before it may appear in a launch post. High-signal details: repo QwenLM/ConsisEval · language Python. onlylabs links this event to 1 captured evidence page and 6 related repo signals. It also maps to Evals and quality in the data-business radar.","semantic_triples":[{"subject":"Qwen (Alibaba Cloud)","predicate":"published repo","object":"QwenLM/ConsisEval","text":"Qwen (Alibaba Cloud) published repo QwenLM/ConsisEval."},{"subject":"QwenLM/ConsisEval","predicate":"is classified as","object":"repo signal","text":"QwenLM/ConsisEval is classified as repo signal."},{"subject":"QwenLM/ConsisEval","predicate":"belongs to","object":"repos desk","text":"QwenLM/ConsisEval belongs to repos desk."},{"subject":"QwenLM/ConsisEval","predicate":"has context","object":"Python","text":"QwenLM/ConsisEval has context Python."},{"subject":"QwenLM/ConsisEval","predicate":"has evidence coverage","object":"1 captured evidence page","text":"QwenLM/ConsisEval has evidence coverage 1 captured evidence page."},{"subject":"QwenLM/ConsisEval","predicate":"matches data-business lanes","object":"Evals and quality","text":"QwenLM/ConsisEval matches data-business lanes Evals and quality."}]},"signal":{"id":"e7153ae9-7b6e-4bdd-b3c0-bdd12787c902","url":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902","json_url":"https://onlylabs.fyi/signals/e7153ae9-7b6e-4bdd-b3c0-bdd12787c902/signal.json","source_url":"https://github.com/QwenLM/ConsisEval","title":"QwenLM/ConsisEval","summary":"Qwen (Alibaba Cloud) published a new repository. onlylabs watches repos for tooling, eval, infra, and model-adjacent work.","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2024-06-17T07:46:23+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source","evidence_coverage":{"target_pages":1,"captured_pages":1,"readable_pages":1,"capture_methods":["plain"],"missing_page_urls":[],"failed_page_urls":[],"blocked_page_urls":[],"page_urls":["https://github.com/QwenLM/ConsisEval"]},"facets":{"repo":"QwenLM/ConsisEval","language":"Python"},"traction":{"github_stars":15,"hn_points":null,"hn_comments":null,"hn_story_id":null,"hf_downloads":null,"hf_likes":null},"data_radar":{"lanes":[{"key":"evals","label":"Evals and quality","url":"https://onlylabs.fyi/data-radar/evals"}],"score":14,"matched_terms":["eval"],"reason":"Qwen (Alibaba Cloud) has a repo signal matching evals and quality."}},"primary_evidence_page":{"url":"https://github.com/QwenLM/ConsisEval","final_url":"https://github.com/QwenLM/ConsisEval","title":"QwenLM/ConsisEval repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:59:05.329164+00:00","bytes":10845,"raw_path":"cbdf85b8c34ed9fc7e2bb224c240fd94cb0e52efa7da2bb0a161b374606e8e49.json","content_hash":"b25815f5386da140113739ad807af20bae253bd4a010e00a4c273f89d0dea5e0","excerpt_chars":1200,"truncated":true,"excerpt":"QwenLM/ConsisEval Language: Python License: MIT Stars: 15 Forks: 9 Open issues: 0 Created: 2024-06-17T07:46:23Z Pushed: 2024-07-05T07:54:29Z Default branch: preview Fork: no Archived: no README: <div align=\"center\"> <h2><i>ConsisEval:</i> A Hard-to-Easy Consistency Evaluation<br>Benchmark for Large Language Models</h2> </div> <!-- <p align=\"center\"> | <b>Paper</b> | <b>Leaderboard</b> | </p> --> <!-- This is the repo for our paper: Can Large Language Models Always Solve Easy Problems if They Can Solve Harder Ones? --> - This repo is for paper [Can Large Language Models Always Solve Easy Problems if They Can Solve Harder Ones?](https://arxiv.org/abs/2406.12809) Overview ConsisEval is developed to systematically evaluate the hard-to-easy consistency of LLMs. Here the hard-to-easy inconsistency refers to the counter-intuitive phenomenons where LLMs, while capable of solving hard problems, can paradoxically fail at easier ones. ConsisEval includes 732 pair of questions from code (164), mathematics (298), and instruction-following (270) domains. It is noteworthy that there are only pairwise data in ConsisEval: one datum is comprised of two questions (an easy question and a harder one),..."},"evidence_pages":[{"url":"https://github.com/QwenLM/ConsisEval","final_url":"https://github.com/QwenLM/ConsisEval","title":"QwenLM/ConsisEval repository metadata","http_status":200,"content_type":"application/json","capture_method":"plain","fetched_at":"2026-06-11T03:59:05.329164+00:00","bytes":10845,"raw_path":"cbdf85b8c34ed9fc7e2bb224c240fd94cb0e52efa7da2bb0a161b374606e8e49.json","content_hash":"b25815f5386da140113739ad807af20bae253bd4a010e00a4c273f89d0dea5e0","excerpt_chars":1200,"truncated":true,"excerpt":"QwenLM/ConsisEval Language: Python License: MIT Stars: 15 Forks: 9 Open issues: 0 Created: 2024-06-17T07:46:23Z Pushed: 2024-07-05T07:54:29Z Default branch: preview Fork: no Archived: no README: <div align=\"center\"> <h2><i>ConsisEval:</i> A Hard-to-Easy Consistency Evaluation<br>Benchmark for Large Language Models</h2> </div> <!-- <p align=\"center\"> | <b>Paper</b> | <b>Leaderboard</b> | </p> --> <!-- This is the repo for our paper: Can Large Language Models Always Solve Easy Problems if They Can Solve Harder Ones? --> - This repo is for paper [Can Large Language Models Always Solve Easy Problems if They Can Solve Harder Ones?](https://arxiv.org/abs/2406.12809) Overview ConsisEval is developed to systematically evaluate the hard-to-easy consistency of LLMs. Here the hard-to-easy inconsistency refers to the counter-intuitive phenomenons where LLMs, while capable of solving hard problems, can paradoxically fail at easier ones. ConsisEval includes 732 pair of questions from code (164), mathematics (298), and instruction-following (270) domains. It is noteworthy that there are only pairwise data in ConsisEval: one datum is comprised of two questions (an easy question and a harder one),..."}],"related_signals":[{"id":"cf5152e4-92e2-4b30-8b9c-39fcaa9decd0","url":"https://onlylabs.fyi/signals/cf5152e4-92e2-4b30-8b9c-39fcaa9decd0","source_url":"https://github.com/QwenLM/open-computer-use","title":"QwenLM/open-computer-use","context":"Swift","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2026-06-01T08:10:48+00:00","first_seen_at":"2026-06-09T07:00:19.904658+00:00","date_source":"source"},{"id":"ed913285-e7b2-4360-9597-781340861ca6","url":"https://onlylabs.fyi/signals/ed913285-e7b2-4360-9597-781340861ca6","source_url":"https://github.com/QwenLM/Qwen-VLA","title":"QwenLM/Qwen-VLA","context":null,"kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2026-05-28T03:46:53+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source"},{"id":"34eb5dfd-321d-4e8f-b0da-49498c12414c","url":"https://onlylabs.fyi/signals/34eb5dfd-321d-4e8f-b0da-49498c12414c","source_url":"https://github.com/QwenLM/Qwen-Image-Bench","title":"QwenLM/Qwen-Image-Bench","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2026-05-21T03:41:40+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source"},{"id":"95d8ede7-9114-4665-8daa-a0da982f875a","url":"https://onlylabs.fyi/signals/95d8ede7-9114-4665-8daa-a0da982f875a","source_url":"https://github.com/QwenLM/FlashQLA","title":"QwenLM/FlashQLA","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2026-04-24T09:18:05+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source"},{"id":"e8c7f065-45a3-4af3-99ae-0c71de12ade9","url":"https://onlylabs.fyi/signals/e8c7f065-45a3-4af3-99ae-0c71de12ade9","source_url":"https://github.com/QwenLM/WebWorld","title":"QwenLM/WebWorld","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2026-02-13T14:02:59+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source"},{"id":"d44d7358-ccf7-4e31-b8a3-253b62d26b31","url":"https://onlylabs.fyi/signals/d44d7358-ccf7-4e31-b8a3-253b62d26b31","source_url":"https://github.com/QwenLM/RationaleRM","title":"QwenLM/RationaleRM","context":"Python","kind":{"key":"repo_new","label":"Repo"},"org":{"slug":"qwen","name":"Qwen (Alibaba Cloud)","category":"frontier-lab"},"occurred_at":"2026-02-02T14:15:02+00:00","first_seen_at":"2026-06-05T05:42:59.154483+00:00","date_source":"source"}]}