@Article{cmc.2026.079331, AUTHOR = {Giulio Caporro, Paolo Russo}, TITLE = {DeepEchoNet: A Lightweight Architecture for Low Resolution Monocular Depth Estimation}, JOURNAL = {Computers, Materials \& Continua}, VOLUME = {}, YEAR = {}, NUMBER = {}, PAGES = {{pages}}, URL = {http://www.techscience.com/cmc/online/detail/26641}, ISSN = {1546-2226}, ABSTRACT = {Monocular depth estimation (MDE) has become a practical alternative to active range sensing in many indoor scenarios, enabled by supervised deep learning models that predict dense depth maps from a single RGB image. However, most modern MDE systems assume mid-to-high resolution inputs and non-trivial compute budgets, limiting their direct applicability in embedded and bandwidth-constrained settings. This paper studies low resolution MDE, focusing on 96×96 inputs, where geometric cues are strongly degraded and naively downsizing high-resolution architectures often leads to unstable training and poor accuracy. We propose DeepEchoNet, a lightweight hybrid CNN-transformer model tailored to operate natively at 96×96 resolution. The design combines a MobileViT-inspired encoder with MobileNetV2-style inverted residual blocks and lightweight transformer blocks, and a guided decoder that selectively fuses multi-scale skip features through efficient recalibration modules and separable convolutions. We further adopt a training objective that is aware of low resolution, along with a joint RGB–depth augmentation pipeline that includes a strong-to-weak schedule, to improve robustness while preserving coarse geometric consistency.}, DOI = {10.32604/cmc.2026.079331} }