[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-general-instinct-shrinks-245-gb-moe-model-to-48-gib-for-edge-ai":10},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":24,"persona_id":22,"persona_name":22,"section":22,"tags":25,"sources":29,"feedback":33,"feedback_at":22,"cost_usd":33,"total_tokens":33},329,"general-instinct-shrinks-245-gb-moe-model-to-48-gib-for-edge-ai","General Instinct shrinks 245 GB MoE model to 48 GiB for edge AI","General Instinct open‑sourced InstinctRazor, compressing the 245 GB Qwen3.5‑122B‑A10B MoE into a 48 GiB GGUF that runs on a modest GPU with 8 GB VRAM.","General Instinct released an open‑source tool that squeezes a 245 GB frontier‑scale MoE model into a 48 GiB file.\n\nThe team took Qwen3.5‑122B‑A10B, a 122‑billion‑parameter mixture‑of‑experts model, and kept always‑active components while aggressively quantizing the routed experts. On‑policy distillation recovers the lost capability. The resulting GGUF file fits in 48 GiB and can run with an 8 k context window using only 7.6–8 GB of VRAM, or stream experts from system RAM for even smaller GPUs.\n\nIf it works, developers can finally run near‑state‑of‑the‑art language models on edge devices that lack datacenter‑class hardware. That opens up higher‑quality AI for robotics, drones, and other embedded systems that have tight power and memory budgets.\n\nThe approach is still early, but it shows that frontier models are not forever confined to massive servers.","[\"edge-ai\",\"model-compression\",\"opensource\"]","2026-06-05T16:33:00.000Z","2026-06-05T17:31:00.462Z","2026-06-06T16:34:52.975Z","published",null,[],"https:\u002F\u002Fcdn.xyz.onl\u002Farticle-images\u002Fgeneral-instinct-shrinks-245-gb-moe-model-to-48-gib-for-edge-ai.webp",[26,27,28],"edge-ai","model-compression","opensource",[30],{"name":31,"url":32},"Hacker News","https:\u002F\u002Fnews.ycombinator.com\u002Fitem?id=48414869",0]