Imperial College London

DrJohnLees

Faculty of MedicineSchool of Public Health

Visiting Researcher
 
 
 
//

Contact

 

+44 (0)20 7594 2939j.lees Website

 
 
//

Location

 

UG4Sir Alexander Fleming BuildingSouth Kensington Campus

//

Summary

 

Publications

Citation

BibTex format

@article{Tonkin-Hill:2020:10.1101/2020.01.28.922989,
author = {Tonkin-Hill, G and MacAlasdair, N and Ruis, C and Weimann, A and Horesh, G and Lees, JA and Gladstone, RA and Lo, S and Beaudoin, C and Floto, RA and Frost, SDW and Corander, J and Bentley, SD and Parkhill, J},
doi = {10.1101/2020.01.28.922989},
title = {Producing Polished Prokaryotic Pangenomes with the Panaroo Pipeline},
url = {http://dx.doi.org/10.1101/2020.01.28.922989},
year = {2020}
}

RIS format (EndNote, RefMan)

TY  - JOUR
AB - <jats:p>Population-level comparisons of prokaryotic genomes must take into account the substantial differences in gene content, resulting from frequent horizontal gene transfer, gene duplication and gene loss. However, the automated annotation of prokaryotic genomes is imperfect, and errors due to fragmented assemblies, contamination, diverse gene families and mis-assemblies accumulate over the population, leading to profound consequences when analysing the set of all genes found in a species. Here we introduce Panaroo, a graph based pangenome clustering tool that is able to account for many of the sources of error introduced during the annotation of prokaryotic genome assemblies. We verified our approach through extensive simulations of de novo assemblies using the infinitely many genes model and by analysing a number of publicly available large bacterial genome datasets. Using a highly clonal <jats:italic>Mycobacterium tuberculosis</jats:italic> dataset as a negative control case, we show that failing to account for annotation errors can lead to pangenome estimates that are dominated by error. We additionally demonstrate the utility of the improved graphical output provided by Panaroo by performing a pan-genome wide association study in <jats:italic>Neisseria gonorrhoeae</jats:italic> and by analysing gene gain and loss rates across 51 of the major global pneumococcal sequence clusters. Panaroo is freely available under an open source MIT licence at <jats:ext-link xmlns:xlink="http://www.w3.org/1999/xlink" ext-link-type="uri" xlink:href="https://github.com/gtonkinhill/panaroo">https://github.com/gtonkinhill/panaroo</jats:ext-link>.</jats:p>
AU - Tonkin-Hill,G
AU - MacAlasdair,N
AU - Ruis,C
AU - Weimann,A
AU - Horesh,G
AU - Lees,JA
AU - Gladstone,RA
AU - Lo,S
AU - Beaudoin,C
AU - Floto,RA
AU - Frost,SDW
AU - Corander,J
AU - Bentley,SD
AU - Parkhill,J
DO - 10.1101/2020.01.28.922989
PY - 2020///
TI - Producing Polished Prokaryotic Pangenomes with the Panaroo Pipeline
UR - http://dx.doi.org/10.1101/2020.01.28.922989
ER -