[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-physicists-can-engineer-ai-scaling-laws-with-synthetic-data":10,"sections":34},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":22,"persona_id":22,"persona_name":22,"section":24,"tags":25,"sources":29,"feedback":33,"feedback_at":22,"cost_usd":33,"total_tokens":33},1731,"physicists-can-engineer-ai-scaling-laws-with-synthetic-data","Physicists Can Engineer AI Scaling Laws With Synthetic Data","A new study shows that smarter pretraining data composition can shift AI scaling behavior toward needing more data rather than bigger models.","Particle physics researchers have found a way to steer how AI models scale — by engineering the training data itself.\n\nA paper posted to arXiv shows that for the task of classifying hadronic jets — sprays of particles produced in high-energy collisions — neural scaling laws are not fixed. Researchers demonstrated that by making pretraining data more diverse and better aligned with the downstream task, they could shift the scaling regime: models improved more from additional data than from additional parameters. The key enabler is that physics has high-fidelity simulators that produce synthetic data cheaply, unlike natural language or image domains where data collection is costly.\n\nThis matters because the dominant assumption in AI has been that scaling laws are essentially discovered, not designed. If data composition can actively shape the curve, labs working in scientific domains could sidestep expensive model size increases by investing in better synthetic data pipelines instead. That is a meaningful cost lever, especially as frontier model training runs push into nine-figure budgets.\n\nThe finding is specific to particle physics for now, and hadronic jet classification is a narrow benchmark — so anyone tempted to generalize this to language model training should pump the brakes.","[\"ai\",\"machine-learning\",\"physics\",\"research\"]","2026-06-19T04:00:00.000Z","2026-06-19T10:51:43.880Z","2026-06-19T14:21:38.220Z","published",null,[],"ai",[24,26,27,28],"machine-learning","physics","research",[30],{"name":31,"url":32},"arXiv cs.AI","https:\u002F\u002Farxiv.org\u002Fabs\u002F2606.19781",0,{"sections":35},[36,40,44,49,54,59,64,68,72,77,82,87,92,97],{"name":37,"slug":24,"count":38,"latest_published_at":39},"AI",491,"2026-06-19T14:59:11.000Z",{"name":41,"slug":42,"count":43,"latest_published_at":18},"Security","security",132,{"name":45,"slug":46,"count":47,"latest_published_at":48},"Policy","policy",88,"2026-06-16T09:26:09.000Z",{"name":50,"slug":51,"count":52,"latest_published_at":53},"Consumer Tech","consumer-tech",78,"2026-06-16T17:58:24.000Z",{"name":55,"slug":56,"count":57,"latest_published_at":58},"Hardware","hardware",62,"2026-06-18T15:24:16.000Z",{"name":60,"slug":61,"count":62,"latest_published_at":63},"Deals","deals",58,"2026-06-19T14:43:50.000Z",{"name":65,"slug":66,"count":62,"latest_published_at":67},"Software","software","2026-06-16T20:00:00.000Z",{"name":69,"slug":70,"count":71,"latest_published_at":18},"Dev Tools","dev-tools",50,{"name":73,"slug":74,"count":75,"latest_published_at":76},"Science","science",38,"2026-06-18T04:00:00.000Z",{"name":78,"slug":79,"count":80,"latest_published_at":81},"Gaming","gaming",31,"2026-06-16T15:25:13.000Z",{"name":83,"slug":84,"count":85,"latest_published_at":86},"General","general",26,"2026-06-13T18:35:15.000Z",{"name":88,"slug":89,"count":90,"latest_published_at":91},"Startups","startups",23,"2026-06-16T15:00:00.000Z",{"name":93,"slug":94,"count":95,"latest_published_at":96},"Reviews","reviews",19,"2026-06-14T08:00:00.000Z",{"name":98,"slug":99,"count":100,"latest_published_at":101},"How-To","how-to",6,"2026-06-16T09:00:00.000Z"]