BibTex format

author = {Pardo, F and Levdik, V and Kormushev, P},
pages = {5355--5362},
publisher = {Association for the Advancement of Artificial Intelligence},
title = {Scaling all-goals updates in reinforcement learning using convolutional neural networks},
url = {},
year = {2020}

RIS format (EndNote, RefMan)

AB - Being able to reach any desired location in the environmentcan be a valuable asset for an agent. Learning a policy to nav-igate between all pairs of states individually is often not fea-sible. Anall-goals updatingalgorithm uses each transitionto learn Q-values towards all goals simultaneously and off-policy. However the expensive numerous updates in parallellimited the approach to small tabular cases so far. To tacklethis problem we propose to use convolutional network archi-tectures to generate Q-values and updates for a large numberof goals at once. We demonstrate the accuracy and generaliza-tion qualities of the proposed method on randomly generatedmazes and Sokoban puzzles. In the case of on-screen goalcoordinates the resulting mapping from frames todistance-mapsdirectly informs the agent about which places are reach-able and in how many steps. As an example of applicationwe show that replacing the random actions inε-greedy ex-ploration by several actions towards feasible goals generatesbetter exploratory trajectories on Montezuma’s Revenge andSuper Mario All-Stars games.
AU - Pardo,F
AU - Levdik,V
AU - Kormushev,P
EP - 5362
PB - Association for the Advancement of Artificial Intelligence
PY - 2020///
SN - 2374-3468
SP - 5355
TI - Scaling all-goals updates in reinforcement learning using convolutional neural networks
UR -
UR -
ER -