BibTex format

author = {Beckmann, P and Kegler, M and Cernak, M},
doi = {10.23919/eusipco54536.2021.9616254},
publisher = {IEEE},
title = {Word-level embeddings for cross-task transfer learning in speech processing},
url = {},
year = {2021}

RIS format (EndNote, RefMan)

AB - Recent breakthroughs in deep learning often rely on representation learning and knowledge transfer. In recent years, unsupervised and self-supervised techniques for learning speech representation were developed to foster automatic speech recognition. Up to date, most of these approaches are task-specific and designed for within-task transfer learning between different datasets or setups of a particular task. In turn, learning task-independent representation of speech and cross-task applications of transfer learning remain less common. Here, we introduce an encoder capturing word-level representations of speech for cross-task transfer learning. We demonstrate the application of the pre-trained encoder in four distinct speech and audio processing tasks: (i) speech enhancement, (ii) language identification, (iii) speech, noise, and music classification, and (iv) speaker identification. In each task, we compare the performance of our cross-task transfer learning approach to task-specific baselines. Our results show that the speech representation captured by the encoder through the pre-training is transferable across distinct speech processing tasks and datasets. Notably, even simple applications of our pre-trained encoder outperformed task-specific methods, or were comparable, depending on the task.
AU - Beckmann,P
AU - Kegler,M
AU - Cernak,M
DO - 10.23919/eusipco54536.2021.9616254
PY - 2021///
TI - Word-level embeddings for cross-task transfer learning in speech processing
UR -
UR -
UR -
ER -