[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"branding":3,"analytics":7,"article-clinical-ai-for-suicide-risk-has-a-data-problem":10,"sections":34},{"siteName":4,"siteTagline":5,"publisherName":4,"contactEmail":6},"The Revision","Tech news, decoded.","editor@therevision.news",{"gaMeasurementId":8,"adsenseClientId":9},"G-ZW2MV82GYR","ca-pub-8533917693782264",{"article":11},{"id":12,"slug":13,"title":14,"dek":15,"body_md":16,"tags_json":17,"published_at":18,"created_at":19,"updated_at":20,"status":21,"review_note":22,"review_notes":23,"image_url":22,"persona_id":22,"persona_name":22,"section":24,"tags":25,"sources":29,"feedback":33,"feedback_at":22,"cost_usd":33,"total_tokens":33},1721,"clinical-ai-for-suicide-risk-has-a-data-problem","Clinical AI for Suicide Risk Has a Data Problem","A new paper argues that EHR-based suicidality datasets encode clinician biases and labeling shortcuts that AI models then mistake for ground truth.","Electronic health records were supposed to be the reliable alternative to social media for training suicide-risk AI — turns out they carry their own blind spots.\n\nResearchers examined ScAN, a suicidality dataset built on MIMIC-III clinical notes, and found that its labels are shaped by forces that have nothing to do with clinical reality. Cohort selection follows ICD billing codes. A single annotator applies labels. Episodes are flattened to the hospital-stay level, erasing how risk evolves over time. The result is a dataset that reflects what clinicians documented — not what patients experienced.\n\nThat distinction matters because NLP models trained on these labels inherit every assumption baked into them: that suicidality is a bounded episode with a clear start and end, that intent can be reliably read from a clinical note, and that ambiguity was resolved correctly. A linguistic analysis in the paper found that identical labels cover notes with very different temporal framings, negations, and degrees of uncertainty.\n\nThe broader implication is uncomfortable for anyone deploying clinical AI at scale. If the labels are systematically biased by documentation practices, hospital governance, and who was in the room, then benchmark scores on those datasets measure how well a model learned those biases — not how well it detects suicide risk. EHR data has been treated as more trustworthy than Twitter posts; this paper argues the trust is partly unearned.","[\"ai\",\"clinical-nlp\",\"mental-health\",\"datasets\"]","2026-06-19T04:00:00.000Z","2026-06-19T10:36:40.886Z","2026-06-19T14:21:37.955Z","published",null,[],"ai",[24,26,27,28],"clinical-nlp","mental-health","datasets",[30],{"name":31,"url":32},"arXiv cs.AI","https:\u002F\u002Farxiv.org\u002Fabs\u002F2606.19637",0,{"sections":35},[36,40,44,49,54,59,64,68,72,77,82,87,92,97],{"name":37,"slug":24,"count":38,"latest_published_at":39},"AI",491,"2026-06-19T14:59:11.000Z",{"name":41,"slug":42,"count":43,"latest_published_at":18},"Security","security",132,{"name":45,"slug":46,"count":47,"latest_published_at":48},"Policy","policy",88,"2026-06-16T09:26:09.000Z",{"name":50,"slug":51,"count":52,"latest_published_at":53},"Consumer Tech","consumer-tech",78,"2026-06-16T17:58:24.000Z",{"name":55,"slug":56,"count":57,"latest_published_at":58},"Hardware","hardware",62,"2026-06-18T15:24:16.000Z",{"name":60,"slug":61,"count":62,"latest_published_at":63},"Deals","deals",58,"2026-06-19T14:43:50.000Z",{"name":65,"slug":66,"count":62,"latest_published_at":67},"Software","software","2026-06-16T20:00:00.000Z",{"name":69,"slug":70,"count":71,"latest_published_at":18},"Dev Tools","dev-tools",50,{"name":73,"slug":74,"count":75,"latest_published_at":76},"Science","science",38,"2026-06-18T04:00:00.000Z",{"name":78,"slug":79,"count":80,"latest_published_at":81},"Gaming","gaming",31,"2026-06-16T15:25:13.000Z",{"name":83,"slug":84,"count":85,"latest_published_at":86},"General","general",26,"2026-06-13T18:35:15.000Z",{"name":88,"slug":89,"count":90,"latest_published_at":91},"Startups","startups",23,"2026-06-16T15:00:00.000Z",{"name":93,"slug":94,"count":95,"latest_published_at":96},"Reviews","reviews",19,"2026-06-14T08:00:00.000Z",{"name":98,"slug":99,"count":100,"latest_published_at":101},"How-To","how-to",6,"2026-06-16T09:00:00.000Z"]