
@Article{cmc.2025.069910,
AUTHOR = {Bing Wei, Ming Zhong, Qian Chen, Yi Wu, Yubin Li},
TITLE = {Cognitive Erasure-Coded Data Update and Repair for Mitigating I/O Overhead},
JOURNAL = {Computers, Materials \& Continua},
VOLUME = {86},
YEAR = {2026},
NUMBER = {2},
PAGES = {1--20},
URL = {http://www.techscience.com/cmc/v86n2/64737},
ISSN = {1546-2226},
ABSTRACT = {In erasure-coded storage systems, updating data requires parity maintenance, which often leads to significant I/O amplification due to “write-after-read” operations. Furthermore, scattered parity placement increases disk seek overhead during repair, resulting in degraded system performance. To address these challenges, this paper proposes a Cognitive Update and Repair Method (CURM) that leverages machine learning to classify files into write-only, read-only, and read-write categories, enabling tailored update and repair strategies. For write-only and read-write files, CURM employs a data-difference mechanism combined with fine-grained I/O scheduling to minimize redundant read operations and mitigate I/O amplification. For read-write files, CURM further reserves adjacent disk space near parity blocks, supporting parallel reads and reducing disk seek overhead during repair. We implement CURM in a prototype system, Cognitive Update and Repair File System (CURFS), and conduct extensive experiments using real-world Network File System (NFS) and Microsoft Research (MSR) workloads on a 25-node cluster. Experimental results demonstrate that CURM improves data update throughput by up to 82.52%, reduces recovery time by up to 47.47%, and decreases long-term storage overhead by more than 15% compared to state-of-the-art methods including Full Logging (FL), Parity Logging (PL), Parity Logging with Reserved space (PLR), and PARIX. These results validate the effectiveness of CURM in enhancing both update and repair performance, providing a scalable and efficient solution for large-scale erasure-coded storage systems.},
DOI = {10.32604/cmc.2025.069910}
}



