
@Article{cmc.2026.079708,
AUTHOR = {Raza Hasan, Shakeel Ahmad, Ismet Gocer, Zakirul Bhuiyan},
TITLE = {Late-Fusion of Heterogeneous Maritime Data Using Self-Attention for Interpretable Anomaly Detection},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {},
YEAR = {},
NUMBER = {},
PAGES = {{pages}},
URL = {http://www.techscience.com/cmc/online/detail/26450},
ISSN = {1546-2226},
ABSTRACT = {Maritime Domain Awareness (MDA) is critical for global security and economic stability, yet it is increasingly challenged by sophisticated adversarial tactics such as signal spoofing and “dark vessel” activities. Traditional surveillance systems, often reliant on single-sensor modalities, are ill-equipped to handle these deceptive behaviors. To address this, we propose the Multimodal Attention-based Fusion Transformer (MAFT), a novel deep learning architecture that integrates four distinct data modalities—Aerial imagery, Synthetic Aperture Radar (SAR), acoustic signatures, and Automatic Identification System (AIS) data—to achieve robust and interpretable maritime anomaly detection. A key contribution of our work is a principled synthetic data generation pipeline that creates a large-scale, labeled dataset (16,000 samples) for four critical anomaly types: Correlated Activity, Dark Vessels, AIS Spoofing, and Kinematic Anomalies. MAFT architecture employs modality-specific encoders to project heterogeneous data into a common 320-dimensional embedding space. These embeddings are then tokenized and supplied to a multi-layer Transformer Encoder, which leverages a self-attention mechanism for late-fusion, learning complex, non-linear inter-modal relationships. We also introduce “modality dropout” (<i>p</i> = 0.3) as a regularization technique to enhance model robustness against sensor failure or data unavailability. Quantitative analysis shows our model achieves a 97.02% F1-score and a significantly improved Expected Calibration Error (ECE) of 0.011, outperforming Early Fusion CNN, Mid-Fusion MLP, and Decision-Ensemble baselines. Furthermore, computational profiling confirms an inference latency of 26.54 ms, demonstrating operational readiness for real-time deployment. Analysis of the model’s attention weights suggests that MAFT not only accurately classifies maritime activities but also provides a high degree of model interpretability, offering crucial, data-driven insights for maritime security operators.},
DOI = {10.32604/cmc.2026.079708}
}



