@article{7ae621e58fec43f892c4cef1533cb6f5,
title = "Navigating the development challenges in creating complex data systems",
abstract = "Data science systems (DSSs) are a fundamental tool in many areas of research and are now being developed by people with a myriad of backgrounds. This is coupled with a crisis in the reproducibility of such DSSs, despite the wide availability of powerful tools for data science and machine learning over the past decade. We believe that perverse incentives and a lack of widespread software engineering skills are among the many causes of this crisis and analyse why software engineering and building large complex systems is, in general, hard. Based on these insights, we identify how software engineering addresses those difficulties and how one might apply and generalize software engineering methods to make DSSs more fit for purpose. We advocate two key development philosophies: one should incrementally grow—not plan then build—DSSs, and one should use two types of feedback loop during development—one that tests the code{\textquoteright}s correctness and another that evaluates the code{\textquoteright}s efficacy.",
keywords = "Applied mathematics, Software",
author = "S{\"o}ren Dittmer and Michael Roberts and Julian Gilbey and Ander Biguri and Jacobus Preller and Rudd, {James H. F.} and Aston, {John A. D.} and Sch{\"o}nlieb, {Carola Bibiane} and Emily Jefferson",
note = "Funding Information: We are grateful to the EU/EFPIA Innovative Medicines Initiative project DRAGON (101005122; S.D. and M.R., AIX-COVNET, C.-B.S.), Trinity Challenge BloodCounts! project (M.R., J.G. and C.-B.S.), EPSRC Cambridge Mathematics of Information in Healthcare Hub EP/T017961/1 (M.R., J.H.F.R., J.A.D.A. and C.-B.S.), Cantab Capital Institute for the Mathematics of Information (C.-B.S.), the European Research Council for Horizon 2020 grant no. 777826 (C.-B.S.), the Alan Turing Institute (C.-B.S.), the Wellcome Trust (J.H.F.R.), Cancer Research UK Cambridge Centre (C9685/A25177; C.-B.S.), the British Heart Foundation (J.H.F.R.), NIHR Cambridge Biomedical Research Centre (J.H.F.R.), HEFCE (J.H.F.R.), Leverhulme Trust project on {\textquoteleft}Breaking the non-convexity barrier{\textquoteright} (C.-B.S.), the Philip Leverhulme Prize (C.-B.S.), EPSRC grants EP/S026045/1 and EP/T003553/1 (C.-B.S.) and the Wellcome Innovator Award RG98755 (C.-B.S.). We are also grateful to Intel for financial support, I. Selby for creative input, and J.-C. Lohmann, S. Griffith, J. Tang and F. Zhang for comments and discussions. Copyright: {\textcopyright} 2023, Springer Nature Limited.",
year = "2023",
month = jun,
day = "1",
doi = "10.1038/s42256-023-00665-x",
language = "English",
volume = "5",
pages = "681--686",
journal = "Nature Machine Intelligence",
issn = "2522-5839",
publisher = "Springer International Publishing",
}