@Article{cmes.2026.081129,
AUTHOR = {Maram Fahaad Almufareh, Samabia Tehsin},
TITLE = {Interpretable Deep Representation Learning for Pan-Cancer Diagnosis via Pathway-Constrained Transcriptomics},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/CMES/online/detail/27081},
ISSN = {1526-1506},
ABSTRACT = {This article presents a Hierarchical Pathway-Masked Attention Autoencoder (H-PAAE), a biologically inspired representation-learning framework that enables explainable AI-guided cancer diagnosis. The model directly integrates the curated MSigDB Hallmark pathways, introducing pathway-constrained information flow and mechanistic interpretability through multi-level attention mechanisms. Based on TCGA RNA-seq data from 33 tumor types, H-PAAE compresses approximately 20,000 genes into a 128-dimensional latent space while preserving biologically meaningful structure. When used with XGBoost classification, H-PAAE delivers 92.37% test accuracy and 99.38% macro-AUROC with robust cross-validation results (92.5 <mml:math id="mml-ieqn-1"><mml:mo>±</mml:mo></mml:math> 0.6%). SHAP analysis identifies a small number of key latent features, corresponding to conserved oncogenic processes, and pathway enrichment analysis shows strong overlap with cancer hallmarks. H-PAAE provides a clear and interpretable biological foundation for pan-cancer classification, with well-calibrated posterior probabilities that can be used for clinical decision-making, and can be easily integrated into multimodal diagnostic workflows.},
DOI = {10.32604/cmes.2026.081129}
}