Imperial College London

DrStamatiaGiannarou

Faculty of MedicineDepartment of Surgery & Cancer

Senior Lecturer
 
 
 
//

Contact

 

+44 (0)20 7594 3492stamatia.giannarou Website

 
 
//

Location

 

413Bessemer BuildingSouth Kensington Campus

//

Summary

 

Publications

Citation

BibTex format

@article{Tukra:2024:10.1049/htl2.12067,
author = {Tukra, S and Xu, H and Xu, C and Giannarou, S},
doi = {10.1049/htl2.12067},
journal = {Healthcare Technology Letters},
pages = {108--116},
title = {Generalizable stereo depth estimation with masked image modelling},
url = {http://dx.doi.org/10.1049/htl2.12067},
volume = {11},
year = {2024}
}

RIS format (EndNote, RefMan)

TY  - JOUR
AB - Generalizable and accurate stereo depth estimation is vital for 3D reconstruction, especially in surgery. Supervised learning methods obtain best performance however, limited ground truth data for surgical scenes limits generalizability. Self-supervised methods don't need ground truth, but suffer from scale ambiguity and incorrect disparity prediction due to inconsistency of photometric loss. This work proposes a two-phase training procedure that is generalizable and retains the high performance of supervised methods. It entails: (1) performing self-supervised representation learning of left and right views via masked image modelling (MIM) to learn generalizable semantic stereo features (2) utilizing the MIM pre-trained model to learn robust depth representation via supervised learning for disparity estimation on synthetic data only. To improve stereo representations learnt via MIM, perceptual loss terms are introduced, which improve the model's stereo representations learnt by explicitly encouraging the learning of higher scene-level features. Qualitative and quantitative performance evaluation on surgical and natural scenes shows that the approach achieves sub-millimetre accuracy and lowest errors respectively, setting a new state-of-the-art. Despite not training on surgical nor natural scene data for disparity estimation.
AU - Tukra,S
AU - Xu,H
AU - Xu,C
AU - Giannarou,S
DO - 10.1049/htl2.12067
EP - 116
PY - 2024///
SN - 2053-3713
SP - 108
TI - Generalizable stereo depth estimation with masked image modelling
T2 - Healthcare Technology Letters
UR - http://dx.doi.org/10.1049/htl2.12067
UR - http://hdl.handle.net/10044/1/109378
VL - 11
ER -