[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-fm-agent-finds-522-bugs-in-code-that-developers-already-tested":10,"sections":34},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":22,"persona_id":22,"persona_name":22,"section":24,"tags":25,"sources":29,"feedback":33,"feedback_at":22,"cost_usd":33,"total_tokens":33},1806,"fm-agent-finds-522-bugs-in-code-that-developers-already-tested","FM-Agent Finds 522 Bugs in Code That Developers Already Tested","A new LLM-powered framework applies Hoare logic compositionally to large codebases, surfacing serious bugs in systems of up to 143,000 lines of code.","An AI-assisted verification framework found hundreds of bugs in production-grade code that human developers had already signed off on.\n\nFM-Agent, described in a new paper, uses large language models to automate a formal verification technique called Hoare logic - which breaks a program into individual functions and proves each one behaves correctly in isolation. The key problem it solves is specification writing: historically, engineers had to manually describe what every function was supposed to do before a verifier could check it. FM-Agent instead infers those specifications top-down, deriving what a function should do from how the rest of the code calls it. That means it can work on LLM-generated code where even the original developer may not fully understand each function's intended behavior. When it suspects a bug, it auto-generates test cases to confirm and explain the failure.\n\nThe results are hard to dismiss. Across systems of up to 143,000 lines of code - including compilers - FM-Agent completed its analysis within two days per system and found 522 bugs that prior testing had missed. Some of those bugs can cause crashes or silently wrong outputs, the kind of failure that testing alone routinely lets through.\n\nThe timing matters. As LLMs generate more production code at scale, the gap between \"code that runs\" and \"code that is correct\" widens. FM-Agent does not close that gap entirely - formal verification never does - but it applies pressure at exactly the point where human review is weakest: complex, auto-generated code that no single developer fully owns.","[\"formal verification\",\"ai\",\"software testing\",\"llms\"]","2026-06-19T04:00:00.000Z","2026-06-19T12:20:12.503Z","2026-06-19T14:22:19.723Z","published",null,[],"ai",[26,24,27,28],"formal verification","software testing","llms",[30],{"name":31,"url":32},"arXiv cs.AI","https:\u002F\u002Farxiv.org\u002Fabs\u002F2604.11556",0,{"sections":35},[36,40,44,49,54,59,64,68,72,77,82,87,92,97],{"name":37,"slug":24,"count":38,"latest_published_at":39},"AI",491,"2026-06-19T14:59:11.000Z",{"name":41,"slug":42,"count":43,"latest_published_at":18},"Security","security",132,{"name":45,"slug":46,"count":47,"latest_published_at":48},"Policy","policy",88,"2026-06-16T09:26:09.000Z",{"name":50,"slug":51,"count":52,"latest_published_at":53},"Consumer Tech","consumer-tech",78,"2026-06-16T17:58:24.000Z",{"name":55,"slug":56,"count":57,"latest_published_at":58},"Hardware","hardware",62,"2026-06-18T15:24:16.000Z",{"name":60,"slug":61,"count":62,"latest_published_at":63},"Deals","deals",58,"2026-06-19T14:43:50.000Z",{"name":65,"slug":66,"count":62,"latest_published_at":67},"Software","software","2026-06-16T20:00:00.000Z",{"name":69,"slug":70,"count":71,"latest_published_at":18},"Dev Tools","dev-tools",50,{"name":73,"slug":74,"count":75,"latest_published_at":76},"Science","science",38,"2026-06-18T04:00:00.000Z",{"name":78,"slug":79,"count":80,"latest_published_at":81},"Gaming","gaming",31,"2026-06-16T15:25:13.000Z",{"name":83,"slug":84,"count":85,"latest_published_at":86},"General","general",26,"2026-06-13T18:35:15.000Z",{"name":88,"slug":89,"count":90,"latest_published_at":91},"Startups","startups",23,"2026-06-16T15:00:00.000Z",{"name":93,"slug":94,"count":95,"latest_published_at":96},"Reviews","reviews",19,"2026-06-14T08:00:00.000Z",{"name":98,"slug":99,"count":100,"latest_published_at":101},"How-To","how-to",6,"2026-06-16T09:00:00.000Z"]