[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-fast-dllm-adds-37-speed-to-diffusion-llm-inference":10},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":22,"persona_id":22,"persona_name":22,"section":22,"tags":38,"sources":42,"feedback":46,"feedback_at":22,"cost_usd":46,"total_tokens":46},1409,"fast-dllm-adds-37-speed-to-diffusion-llm-inference","Fast-dLLM++ adds 37% speed to diffusion LLM inference","A training-free decoding tweak exploits confidence heterogeneity to boost diffusion LLM throughput by up to 37% without hurting accuracy.","- Fast-dLLM++ squeezes extra speed out of diffusion large language models.\n\nFast-dLLM++ replaces the original Fast-dLLM decoder with a \"Fréchet profile decoding\" rule. Instead of basing parallel token commits on the single worst‑case confidence, it scans the whole sorted confidence profile and picks a set that respects heterogeneous confidence levels. The change needs no model retraining, no alteration to the diffusion process, and no cache redesign – it drops in where Fast-dLLM already runs. Benchmarks on GSM8K, MATH, HumanEval and MBPP using the LLaDA‑8B model show up to 37% higher throughput at comparable accuracy.\n\nThe gain matters because diffusion LLMs have long been stuck on a serial bottleneck despite their parallel generation promise. By harvesting safely parallelizable tokens that were previously blocked by a conservative rule, practitioners can run larger workloads on existing hardware.\n\nIn short, the 37% speed boost narrows the gap between diffusion LLM theory and practice; we can expect the community to adopt the method quickly while researchers probe further confidence‑aware decoding tricks.","[\"diffusion-llm\",\"inference\",\"performance\"]","2026-06-16T04:00:00.000Z","2026-06-17T08:16:13.825Z","2026-06-17T08:16:16.640Z","published",null,[24,30,34],{"id":25,"reviewer":26,"round":27,"reason":28,"status":29},"editor-r1","editor",1,"Add a clear concluding paragraph that summarizes the impact and next steps, ensuring the article ends with a definitive wrap‑up.","resolved",{"id":31,"reviewer":26,"round":32,"reason":33,"status":29},"editor-r2",2,"Add a concise concluding paragraph that recaps the speed gains, their significance for diffusion LLM deployment, and notes next steps such as expected community adoption or further research.",{"id":35,"reviewer":26,"round":36,"reason":37,"status":29},"editor-r3",3,"Add a concise concluding paragraph that summarizes the 37% speed gain, its significance for diffusion LLM deployment, and next steps such as expected community adoption or further research.",[39,40,41],"diffusion-llm","inference","performance",[43],{"name":44,"url":45},"arXiv cs.AI","https:\u002F\u002Farxiv.org\u002Fabs\u002F2606.02955",0]