@InProceedings{pmlr-v242-jongeneel24a, title = {A large deviations perspective on policy gradient algorithms}, author = {Jongeneel, Wouter and Kuhn, Daniel and Li, Mengmeng}, booktitle = {Proceedings of the 6th Annual Learning for Dynamics & Control Conference}, pages = {916--928}, year = {2024}, editor = {Abate, Alessandro and Cannon, Mark and Margellos, Kostas and Papachristodoulou, Antonis}, volume = {242}, series = {Proceedings of Machine Learning Research}, month = {15--17 Jul}, publisher = {PMLR}, pdf = {https://proceedings.mlr.press/v242/jongeneel24a/jongeneel24a.pdf}, url = {https://proceedings.mlr.press/v242/jongeneel24a.html}, abstract = {Motivated by policy gradient methods in the context of reinforcement learning, we derive the first large deviation rate function for the iterates generated by stochastic gradient descent for possibly non-convex objectives satisfying a Polyak-Ɓojasiewicz condition. Leveraging the contraction principle from large deviations theory, we illustrate the potential of this result by showing how convergence properties of policy gradient with a softmax parametrization and an entropy regularized objective can be naturally extended to a wide spectrum of other policy parametrizations.} }