@Article{cmc.2025.063465,
AUTHOR = {I Made Putrama, Péter Martinek},
TITLE = {Neighbor Displacement-Based Enhanced Synthetic Oversampling for Multiclass Imbalanced Data},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {83},
YEAR = {2025},
NUMBER = {3},
PAGES = {5699--5727},
URL = {http://www.techscience.com/cmc/v83n3/61046},
ISSN = {1546-2226},
ABSTRACT = {Imbalanced multiclass datasets pose challenges for machine learning algorithms. They often contain minority classes that are important for accurate predictions. However, when the data is sparsely distributed and overlaps with data points from other classes, it introduces noise. As a result, existing resampling methods may fail to preserve the original data patterns, further disrupting data quality and reducing model performance. This paper introduces Neighbor Displacement-based Enhanced Synthetic Oversampling (NDESO), a hybrid method that integrates a data displacement strategy with a resampling technique to achieve data balance. It begins by computing the average distance of noisy data points to their neighbors and adjusting their positions toward the center before applying random oversampling. Extensive evaluations compare 14 alternatives on nine classifiers across synthetic and 20 real-world datasets with varying imbalance ratios. This evaluation was structured into two distinct test groups. First, the effects of k-neighbor variations and distance metrics are evaluated, followed by a comparison of resampled data distributions against alternatives, and finally, determining the most suitable oversampling technique for data balancing. Second, the overall performance of the NDESO algorithm was assessed, focusing on G-mean and statistical significance. The results demonstrate that our method is robust to a wide range of variations in these parameters and the overall performance achieves an average G-mean score of 0.90, which is among the highest. Additionally, it attains the lowest mean rank of 2.88, indicating statistically significant improvements over existing approaches. This advantage underscores its potential for effectively handling data imbalance in practical scenarios.},
DOI = {10.32604/cmc.2025.063465}
}