Citation

BibTex format

@inproceedings{Lim:2023:10.1162/isal_a_00566,
author = {Lim, BWT and Flageat, M and Cully, A},
doi = {10.1162/isal_a_00566},
pages = {1--10},
publisher = {MIT Press},
title = {Efficient exploration using model-based quality-diversity with gradients},
url = {http://dx.doi.org/10.1162/isal_a_00566},
year = {2023}
}

RIS format (EndNote, RefMan)

TY  - CPAPER
AB - Exploration is a key challenge in Reinforcement Learning,especially in long-horizon, deceptive and sparse-reward environments. For such applications, population-based approaches have proven effective. Methods such as Quality-Diversity deals with this by encouraging novel solutions and producing a diversity of behaviours. However, these methods are driven by either undirected sampling (i.e. mutations) or use approximated gradients (i.e. Evolution Strategies) in the parameter space, which makes them highly sample-inefficient. In this paper, we propose Dynamics-Aware QD-Ext (DA-QD-ext) and Gradient and Dynamics Aware QD (GDA-QD), two model-based Quality-Diversity approaches. They extend existing QD methods to use gradients for efficient exploitation and leverage perturbations in imagination for efficient exploration.Our approach takes advantage of the effectiveness of QD algorithms as good data generators to train deep models and use these models to learn diverse and high-performing populations. We demonstrate that they outperform baseline RL approaches on tasks with deceptive rewards, and maintain the divergent search capabilities of QD approaches while exceeding their performance by ∼ 1.5 times and reaching the same results in 5 times less samples.
AU - Lim,BWT
AU - Flageat,M
AU - Cully,A
DO - 10.1162/isal_a_00566
EP - 10
PB - MIT Press
PY - 2023///
SP - 1
TI - Efficient exploration using model-based quality-diversity with gradients
UR - http://dx.doi.org/10.1162/isal_a_00566
UR - http://hdl.handle.net/10044/1/104354
ER -