[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-staror-blends-tree-search-and-on-the-fly-learning":10,"sections":34},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":22,"persona_id":22,"persona_name":22,"section":24,"tags":25,"sources":29,"feedback":33,"feedback_at":22,"cost_usd":33,"total_tokens":33},1814,"staror-blends-tree-search-and-on-the-fly-learning","StarOR Blends Tree Search and On-the-Fly Learning","StarOR couples Monte Carlo tree search with on-the-fly LoRA fine-tuning to fix the error-propagation problem in automated optimization modeling.","A new research framework called StarOR adapts its AI policy mid-search to cut early modeling errors when solving complex optimization problems.\n\nAutomated optimization modeling - turning real-world logistics, scheduling, or planning problems into valid mathematical formulas - is fragile when done by AI. A single wrong symbolic choice early on can cascade through a full formulation, making the output useless. StarOR splits the process into four stages and combines Monte Carlo Tree Search with test-time reinforcement learning, updating a lightweight LoRA adapter at each decision node via a training method called GRPO. The system uses the branches it generates as comparison signals to improve itself instance by instance, without labeled training data.\n\nThe bigger bet is that \"test-time scaling\" - more compute at inference, not at training - can substitute for expensive annotated datasets. If StarOR's approach generalizes, it points toward optimization AI that adapts to unfamiliar problem types without retraining, meaningful for any organization running logistics, scheduling, or supply chain models in-house. The paper reports state-of-the-art results across five benchmarks using a 4-billion-parameter backbone, outperforming methods that rely on much larger models.\n\n\"State-of-the-art on benchmarks\" is academic research's version of a press release - the interesting question is whether those gains survive contact with a real-world problem that wasn't in the evaluation set.","[\"ai\",\"machine-learning\",\"optimization\",\"research\"]","2026-06-19T04:00:00.000Z","2026-06-19T12:30:26.670Z","2026-06-19T14:22:19.924Z","published",null,[],"ai",[24,26,27,28],"machine-learning","optimization","research",[30],{"name":31,"url":32},"arXiv cs.AI","https:\u002F\u002Farxiv.org\u002Fabs\u002F2606.15197",0,{"sections":35},[36,40,44,49,54,59,64,68,72,77,82,87,92,97],{"name":37,"slug":24,"count":38,"latest_published_at":39},"AI",491,"2026-06-19T14:59:11.000Z",{"name":41,"slug":42,"count":43,"latest_published_at":18},"Security","security",132,{"name":45,"slug":46,"count":47,"latest_published_at":48},"Policy","policy",88,"2026-06-16T09:26:09.000Z",{"name":50,"slug":51,"count":52,"latest_published_at":53},"Consumer Tech","consumer-tech",78,"2026-06-16T17:58:24.000Z",{"name":55,"slug":56,"count":57,"latest_published_at":58},"Hardware","hardware",62,"2026-06-18T15:24:16.000Z",{"name":60,"slug":61,"count":62,"latest_published_at":63},"Deals","deals",58,"2026-06-19T14:43:50.000Z",{"name":65,"slug":66,"count":62,"latest_published_at":67},"Software","software","2026-06-16T20:00:00.000Z",{"name":69,"slug":70,"count":71,"latest_published_at":18},"Dev Tools","dev-tools",50,{"name":73,"slug":74,"count":75,"latest_published_at":76},"Science","science",38,"2026-06-18T04:00:00.000Z",{"name":78,"slug":79,"count":80,"latest_published_at":81},"Gaming","gaming",31,"2026-06-16T15:25:13.000Z",{"name":83,"slug":84,"count":85,"latest_published_at":86},"General","general",26,"2026-06-13T18:35:15.000Z",{"name":88,"slug":89,"count":90,"latest_published_at":91},"Startups","startups",23,"2026-06-16T15:00:00.000Z",{"name":93,"slug":94,"count":95,"latest_published_at":96},"Reviews","reviews",19,"2026-06-14T08:00:00.000Z",{"name":98,"slug":99,"count":100,"latest_published_at":101},"How-To","how-to",6,"2026-06-16T09:00:00.000Z"]