@Article{cmes.2026.077726,
AUTHOR = {Sergio Isai Palomino-Resendiz, César Ulises Solís-Cervantes, Luis Alberto Cantera-Cantera, Jorge de Jesús Morales-Mercado, Diego Alonso Flores-Hernández},
TITLE = {Gradient Descent with Time-Decaying Regularization for Training Linear Neural Networks},
JOURNAL = {Computer Modeling in Engineering \& Sciences},
VOLUME = {147},
YEAR = {2026},
NUMBER = {1},
PAGES = {0--0},
URL = {http://www.techscience.com/CMES/v147n1/67126},
ISSN = {1526-1506},
ABSTRACT = {Many linear-in-parameters models arising in identification and control can be expressed as single-layer artificial neural networks (ANNs) with linear activation, enabling online learning via first-order optimization. In practice, however, standard gradient descent often exhibits slow convergence, large intermediate weights, and stagnation when the regressor data are ill-conditioned or computations are performed under finite precision. This paper proposes <i>Gradient Descent with Time-Decaying Regularization</i> (GD-TDR), a training algorithm that augments the quadratic loss with a regularization term whose weight decays exponentially in time. The proposed schedule enforces uniform strong convexity during early iterations, effectively mitigating neural-paralysis-like behavior associated with flat directions, while asymptotically vanishing so that the unregularized least-squares solution is recovered. A convergence theorem for GD-TDR is established and a concise pseudocode implementation is provided. Numerical and embedded experiments on an online identification problem of a Chua-type chaotic oscillator demonstrate that GD-TDR converges faster and avoids stagnation compared to standard gradient descent, without introducing the steady-state bias characteristic of fixed quadratic regularization.},
DOI = {10.32604/cmes.2026.077726}
}