@Article{cju.2026.077411,
AUTHOR = {Yahya Ghazwani, Mohammad Alghafees, Mishari Alshasha, Fahad Brayan, Abdulrahman Alsayyari, Ali Alyami},
TITLE = {Can AI and predictive models accurately predict stone-free status? a systematic review and meta-analysis},
JOURNAL = {Canadian Journal of Urology},
VOLUME = {33},
YEAR = {2026},
NUMBER = {2},
PAGES = {291--308},
URL = {http://www.techscience.com/CJU/v33n2/67029},
ISSN = {1488-5581},
ABSTRACT = { <b>Objectives:</b> The emergence of artificial intelligence (AI) and predictive modeling offers prospects for clinical, anatomical, and imaging factor combination, like radiomics, to help with stone-free status (SFS) estimation and peroperative decision-making. The goal of this study was, therefore, to define the present performance range, determine sources of heterogeneity, and determine methodological practices permitting reliable implementation by varied circumstances. <b>Methods:</b> We searched six bibliographic databases through 19 September 2025. Studies deriving or validating AI/predictive models for SFS after ureteroscopy were eligible. Independent dual screening, duplicate data extraction, and risk-of-bias consideration using QUADAS-AI were conducted. <b>Results:</b> Five retrospective cohorts were included. Modeling approaches encompassed multivariable logistic regression, regularized/radiomics pipelines, gradient boosting, and ensembles. SFS definitions ranged from &lt;2 mm residual (day-1 to 3 months) to ≤5 mm residual (1 month), determined by plain radiography, ultrasound, and/or CT. The pooled ratio-scale effect for stone size per 1 mm increase was 1.26 (95% CI 0.91–1.76; τ² ≈ 0.055; Q = 18.52; I² = 94.6%; prediction interval 0.03–49.45). Hydronephrosis (moderate–severe vs. mild/none) showed a pooled RR 2.72 (95% CI 0.96–7.72; τ² ≈ 0.821; Q = 65.40; I² = 96.9%; prediction interval 0.03–249.87). As continuous contrasts, stone size was larger in the non-stone-free group (SMD 1.36, 95% CI 0.85–1.86; τ² ≈ 0.096; I² = 72.9%; prediction interval −3.77 to 6.48), and HU was higher (SMD 0.64, 95% CI 0.39–0.90; τ² ≈ 0; Q = 0.73; I² = 0%; prediction interval −0.99 to 2.27). <b>Conclusions:</b> Across studies evaluating AI and predictive models for ureteroscopy, discrimination was generally acceptable to excellent, and performance appeared highest in models integrating radiomics with anatomic/clinical descriptors. However, the degree of between-study heterogeneity (population mix, outcome definitions, imaging protocols, thresholds, and follow-up windows) was sufficiently large that pooled quantitative estimates should be considered clinically uninterpretable.},
DOI = {10.32604/cju.2026.077411}
}