BibTex format
@inproceedings{McKnight:2022,
author = {McKnight, S and Hogg, A and Neo, V and Naylor, P},
pages = {705--712},
publisher = {IEEE},
title = {A study of salient modulation domain features for speaker identification},
url = {https://ieeexplore.ieee.org/abstract/document/9689293},
year = {2022}
}
RIS format (EndNote, RefMan)
TY - CPAPER
AB - This paper studies the ranges of acoustic andmodulation frequencies of speech most relevant for identifyingspeakers and compares the speaker-specific information presentin the temporal envelope against that present in the temporalfine structure. This study uses correlation and feature importancemeasures, random forest and convolutional neural network mod-els, and reconstructed speech signals with specific acoustic and/ormodulation frequencies removed to identify the salient points. Itis shown that the range of modulation frequencies associated withthe fundamental frequency is more important than the 1-16 Hzrange most commonly used in automatic speech recognition, andthat the 0 Hz modulation frequency band contains significantspeaker information. It is also shown that the temporal envelopeis more discriminative among speakers than the temporal finestructure, but that the temporal fine structure still contains usefuladditional information for speaker identification. This researchaims to provide a timely addition to the literature by identifyingspecific aspects of speech relevant for speaker identification thatcould be used to enhance the discriminant capabilities of machinelearning models.
AU - McKnight,S
AU - Hogg,A
AU - Neo,V
AU - Naylor,P
EP - 712
PB - IEEE
PY - 2022///
SP - 705
TI - A study of salient modulation domain features for speaker identification
UR - https://ieeexplore.ieee.org/abstract/document/9689293
UR - http://hdl.handle.net/10044/1/92134
ER -