@Article{cmes.2025.074901,
AUTHOR = {Guangyu Xu, Siyuan Xu, Siyu Lu, Yuxin Liu, Bo Yang, Junmin Lyu, Wenfeng Zheng},
TITLE = {Encoder-Guided Latent Space Search Based on Generative Networks for Stereo Disparity Estimation in Surgical Imaging},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {145},
YEAR = {2025},
NUMBER = {3},
PAGES = {4037--4053},
URL = {http://www.techscience.com/CMES/v145n3/65012},
ISSN = {1526-1506},
ABSTRACT = {Robust stereo disparity estimation plays a critical role in minimally invasive surgery, where dynamic soft tissues, specular reflections, and data scarcity pose major challenges to traditional end-to-end deep learning and deformable model-based methods. In this paper, we propose a novel disparity estimation framework that leverages a pretrained StyleGAN generator to represent the disparity manifold of Minimally Invasive Surgery (MIS) scenes and reformulates the stereo matching task as a latent-space optimization problem. Specifically, given a stereo pair, we search for the optimal latent vector in the intermediate latent space of StyleGAN, such that the photometric reconstruction loss between the stereo images is minimized while regularizing the latent code to remain within the generator’s high-confidence region. Unlike existing encoder-based embedding methods, our approach directly exploits the geometry of the learned latent space and enforces both photometric consistency and manifold prior during inference, without the need for additional training or supervision. Extensive experiments on stereo-endoscopic videos demonstrate that our method achieves high-fidelity and robust disparity estimation across varying lighting, occlusion, and tissue dynamics, outperforming Thin Plate Spline (TPS)-based and linear representation baselines. This work bridges generative modeling and 3D perception by enabling efficient, training-free disparity recovery from pre-trained generative models with reduced inference latency.},
DOI = {10.32604/cmes.2025.074901}
}