BibTex format
@article{Roca:2026,
author = {Roca, Barcelo A and Schneider, R and Pirani, M and Sebastianelli, A and Piel, F and Vineis, P and Nardocci, AC and Fecht, D},
journal = {Scientific Reports},
title = {A satellite based machine learning approach for estimating high resolution daily average air temperature in a megacity in Brazil},
year = {2026}
}
RIS format (EndNote, RefMan)
TY - JOUR
AB - Spatiotemporally resolved ambient temperature data are essential for environmental epidemiology, especially in urban areas where temperature can vary sharply over short distances, influencing population exposure. Additionally, heat distribution often reflects built environment patterns and may correlate with existing social and environmental disparities. Continuous temporal records at high spatial resolution are, however, often lacking, especially in low- and middle-income countries. We developed a generalizable tree-based machine learning approach to estimate daily mean temperatures at 500 x 500 metres resolution using São Paulo, a megacity in Brazil, as a case study, to demonstrate its utility in highly urbanized settingswith a heterogeneous urban fabric and unevenly distributed temperature monitoring stations. We trained a Random Forest model using open-access remote sensing data, along with derived products, and temperature measurements from 43 ground stations. To prevent overfitting and select relevant features, weemployed a forward feature selection algorithm with target-oriented (spatial) cross-validation. Hyperparameter tuning was performed using grid search approach. The model was validated through ten-fold station-based cross-validation and an external hold-out dataset. The model demonstrated strong performance (RMSERF = 0.80; R²RF = 0.95), with slightly reduced accuracy in rural areas (R²rural = 0.91; R²urban = 0.95). Compared to traditional multilinear approaches (RMSEMLR = 1.02; R²MLR = 0.92), the Random Forest model outperformed, likely due to its ability to better capture microclimates and complex relationships between data sources. This 500 x 500 metres daily temperature dataset is the first of its kind in South America, with the São Paulo pipeline and data freely accessible. The approach is adaptable to other regions with appropriate retraining and validation, enabling high-resolution exposure assessments.
AU - Roca,Barcelo A
AU - Schneider,R
AU - Pirani,M
AU - Sebastianelli,A
AU - Piel,F
AU - Vineis,P
AU - Nardocci,AC
AU - Fecht,D
PY - 2026///
SN - 2045-2322
TI - A satellite based machine learning approach for estimating high resolution daily average air temperature in a megacity in Brazil
T2 - Scientific Reports
ER -