@article{01a1557944e145b4a2e62dc0b8e0cbdc,
title = "Quantifying the extent to which index event biases influence large genetic association studies",
abstract = "As genetic association studies increase in size to 100,000s of individuals, subtle biases may influence conclusions. One possible bias is {"}index event bias{"} (IEB) that appears due to the stratification by, or enrichment for, disease status when testing associations between genetic variants and a disease-associated trait. We aimed to test the extent to which IEB influences some known trait associations in a range of study designs and provide a statistical framework for assessing future associations. Analysing data from 113,203 non-diabetic UK Biobank participants, we observed three (near TCF7L2, CDKN2AB and CDKAL1) overestimated (BMI-decreasing) and one (near MTNR1B) underestimated (BMI-increasing) associations among 11 type 2 diabetes risk alleles (at P < 0.05). IEB became even stronger when we tested a type 2 diabetes genetic risk score composed of these 11 variants (-0.010 SDs BMI per allele, P = 5x10(-4)), which was confirmed in four additional independent studies. Similar results emerged when examining the effect of blood pressure increasing alleles on BMI in normotensive UK Biobank samples. Furthermore, we demonstrated that, under realistic scenarios, common disease alleles would become associated at p < 5x10(-8) with disease-related traits through IEB alone, if disease prevalence in the sample differs appreciably from the background population prevalence. For example, some hypertension and type 2 diabetes alleles will be associated with BMI in sample sizes of > 500,000 if the prevalence of those diseases differs by > 10% from the background population. In conclusion, IEB may result in false positive or negative genetic associations in very large studies stratified or strongly enriched for/against disease cases.",
keywords = "alleles, hypertension, body mass index procedure, diabetes mellitus, type 2, blood pressure, genetics, stratification, genetic risk, false-positive results, tcf712 gene, biobanks",
author = "Hanieh Yaghootkar and Bancks, {Michael P.} and Jones, {Sam E.} and Aaron McDaid and Robin Beaumont and Louise Donnelly and Wood, {Andrew R.} and Archie Campbell and Jessica Tyrrell and Hocking, {Lynne J.} and Tuke, {Marcus A.} and Ruth, {Katherine S.} and Pearson, {Ewan R.} and Anna Murray and Freathy, {Rachel M.} and Munroe, {Patricia B.} and Caroline Hayward and Colin Palmer and Weedon, {Michael N.} and Pankow, {James S.} and Frayling, {Timothy M.} and Zolt{\'a}n Kutalik",
note = "This research has been conducted using the UK Biobank Resource. The authors thank University of Exeter Medical School. EXTEND data were provided by the Peninsula Research Bank, part of the NIHR Exeter Clinical Research Facility. P.B.M. wishes to acknowledge support from the NIHR Cardiovascular Biomedical Research Unit at Barts and The London, Queen Mary University of London, UK. We are grateful to all the participants who took part in the GS:SFHS study, to the general practitioners, to the Scottish School of Primary Care for their help in recruiting the participants, and to the whole team, which includes interviewers, computer and laboratory technicians, clerical workers, research scientists, volunteers, managers, receptionists, and nurses. The Wellcome Trust provides support for Wellcome Trust United Kingdom Type 2 Diabetes Case Control Collection (GoDARTS) and informatics support is provided by the Chief Scientist Office. The Atherosclerosis Risk in Communities Study (ARIC) is carried out as a collaborative study supported by National Heart, Lung, and Blood Institute contracts (HHSN268201100005C, HHSN268201100006C, HHSN268201100007C, HHSN268201100008C, HHSN268201100009C, HHSN268201100010C, HHSN268201100011C, and HHSN268201100012C), R01HL087641, R01HL59367 and R01HL086694; National Human Genome Research Institute contract U01HG004402; and National Institutes of Health contract HHSN268200625226C. The authors thank the staff and participants of the ARIC study for their important contributions. Infrastructure was partly supported by Grant Number UL1RR025005, a component of the National Institutes of Health and NIH Roadmap for Medical Research. H.Y., A.R.W. and T.M.F. are supported by the European Research Council grant: 323195; SZF245 50371FGLUCOSEGENESFFP7FIDEASFERC. S.E.J. is funded by the Medical Research Council (grant: MR/M005070/1). M.A.T., M.N.W. and A.M. are supported by the Wellcome Trust Institutional Strategic Support Award (WT097835MF). R.M.F. is a Sir Henry Dale Fellow (Wellcome Trust and Royal Society grant: 104150/Z/14/Z). R.B. is funded by the Wellcome Trust and Royal Society grant: 104150/Z/14/Z. J.T. is funded by a Diabetes Research and Wellness Foundation Fellowship. Z.K. received financial support from the Leenaards Foundation, the Swiss Institute of Bioinformatics and the Swiss National Science Foundation (31003AF143914) and SystemsX.ch ((40)). The work of M.P.B was supported by the National Heart, Lung, And Blood Institute of the National Institutes of Health under Award Number T32HL007779. Generation Scotland received core support from the Chief Scientist Office of the Scottish Government Health Directorates [CZD/16/6] and the Scottish Funding Council [HR03006]. E.R.P. holds a WT New investigator award 102820/Z/13/Z.",
year = "2017",
month = jun,
day = "1",
doi = "10.1093/hmg/ddw433",
language = "English",
volume = "26",
pages = "1018--1030",
journal = "Human Molecular Genetics",
issn = "0964-6906",
publisher = "Oxford University Press",
number = "5",
}