[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-adversabench-finds-a-failure-in-every-llm-it-tests":10,"sections":35},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":24,"persona_id":22,"persona_name":22,"section":25,"tags":26,"sources":30,"feedback":34,"feedback_at":22,"cost_usd":34,"total_tokens":34},2055,"adversabench-finds-a-failure-in-every-llm-it-tests","AdversaBench Finds a Failure in Every LLM It Tests","A new automated red-teaming pipeline cracked every prompt it tried, then found the exploits worked across different model sizes.","A research tool called AdversaBench broke every large language model it was aimed at — and the attacks carried over to models it never trained against.\n\nAdversaBench is an end-to-end red-teaming pipeline that takes seed prompts, mutates them with five structured operators, and then confirms genuine failures using a three-judge panel with a meta-judge tiebreaker. Across 45 seed prompts spanning reasoning, instruction-following, and tool use, every single seed produced a confirmed failure. The pipeline is open-source, with code and datasets published on GitHub.\n\nThe transfer result is the finding worth watching. Adversarial prompts generated against Llama 3.1 8B moved to Llama 3.3 70B with zero additional tuning — a gap of roughly 60 billion parameters. That suggests the mutations are exploiting general behavioral patterns baked into the training process, not quirks of a specific model. If that holds at scale, red-teaming one model in a family may be enough to surface weaknesses across the rest.\n\nThe paper also flags a measurement problem that should make anyone nervous about published safety benchmarks: pairwise judge agreement ran 80-87%, but Cohen's kappa was near zero because of label skew, meaning raw agreement numbers flatter the reliability of automated evaluation. The harder category — instruction-following — took an average of 2.4 attacker iterations to crack, versus 1.1 for reasoning and tool use, a gap that flat binary pass-fail rates would never surface. Safety claims built on those rates deserve a second look.","[\"ai\",\"security\",\"llm\",\"red-teaming\"]","2026-06-24T04:00:00.000Z","2026-06-24T05:14:14.108Z","2026-06-24T05:14:23.930Z","published",null,[],"https:\u002F\u002Fcdn.xyz.onl\u002Farticle-images\u002Fadversabench-finds-a-failure-in-every-llm-it-tests.webp","ai",[25,27,28,29],"security","llm","red-teaming",[31],{"name":32,"url":33},"arXiv cs.AI","https:\u002F\u002Farxiv.org\u002Fabs\u002F2606.24589",0,{"sections":36},[37,40,45,48,53,58,63,68,73,78,83,88,93,98],{"name":38,"slug":25,"count":39,"latest_published_at":18},"AI",528,{"name":41,"slug":42,"count":43,"latest_published_at":44},"Deals","deals",155,"2026-06-24T09:00:00.000Z",{"name":46,"slug":27,"count":47,"latest_published_at":18},"Security",144,{"name":49,"slug":50,"count":51,"latest_published_at":52},"Policy","policy",102,"2026-06-24T07:03:03.000Z",{"name":54,"slug":55,"count":56,"latest_published_at":57},"Consumer Tech","consumer-tech",84,"2026-06-23T21:34:53.000Z",{"name":59,"slug":60,"count":61,"latest_published_at":62},"Hardware","hardware",71,"2026-06-23T16:50:03.000Z",{"name":64,"slug":65,"count":66,"latest_published_at":67},"Software","software",63,"2026-06-23T11:16:34.000Z",{"name":69,"slug":70,"count":71,"latest_published_at":72},"Dev Tools","dev-tools",53,"2026-06-23T18:13:40.000Z",{"name":74,"slug":75,"count":76,"latest_published_at":77},"Science","science",39,"2026-06-23T05:25:16.000Z",{"name":79,"slug":80,"count":81,"latest_published_at":82},"Gaming","gaming",32,"2026-06-22T17:00:00.000Z",{"name":84,"slug":85,"count":86,"latest_published_at":87},"General","general",27,"2026-06-24T08:50:14.000Z",{"name":89,"slug":90,"count":91,"latest_published_at":92},"Startups","startups",24,"2026-06-23T17:25:54.000Z",{"name":94,"slug":95,"count":96,"latest_published_at":97},"Reviews","reviews",19,"2026-06-14T08:00:00.000Z",{"name":99,"slug":100,"count":101,"latest_published_at":102},"How-To","how-to",6,"2026-06-16T09:00:00.000Z"]