sci_notation <- function(x) formatC(x, format = "e", digits = 1)
kable_table <- function(df) {
kable(df, "html") %>%
kable_styling() %>%
scroll_box(height = "300px")
my_data_table <- function(df){ # Make html tables:
df, rownames=FALSE,
autoHideNavigation = TRUE,
extensions = c("Scroller", "Buttons"),
options = list(
dom = 'Bfrtip',
scrollX=TRUE, scrollY=400,
buttons =
list('pageLength', 'colvis', 'csv', list(
extend = 'pdf',
pageSize = 'A4',
orientation = 'landscape',
filename = 'GWAS_sig_loci')),
pageLength = 50
) %>%
columns = c("Female early effect", "Male early effect", "Female late effect", "Male late effect"),
color = styleInterval(cuts = 0, values = c("tomato", "steelblue")),
fontWeight = "bold")
db <- DBI::dbConnect(RSQLite::SQLite(), "data/derived/annotations.sqlite3")
p_cutoff <- 1e-05
univariate_lmm_results <-
tbl(db, "univariate_lmm_results") %>%
select(SNP, SNP_clump, contains("raw"), -contains("SE")) %>%
filter(pvalue_female_early_raw < p_cutoff | pvalue_female_late_raw < p_cutoff |
pvalue_male_early_raw < p_cutoff | pvalue_male_late_raw < p_cutoff) %>%
inner_join(tbl(db, "variants"), by = "SNP") %>%
tbl(db, "genes") %>%
select(FBID, gene_name), by = "FBID") %>%
collect(n=Inf) %>%
mutate_at(vars(contains("beta")), ~ format(round(.x, 2), nsmall = 2)) %>%
rename_all(~ gsub("beta_", "", .x)) %>%
rename_all(~ gsub("_raw", "", .x)) %>%
select(SNP, SNP_clump, MAF, FBID, gene_name, site.class, starts_with("female"),
starts_with("male"), starts_with("P_"), starts_with("pvalue")) %>%
GWAS_table <- univariate_lmm_results %>%
split(.$SNP_clump) %>% # Only keep one SNP from each clump
map_df(~ mutate(.x, sum_p = pvalue_female_early + pvalue_female_late + pvalue_male_early + pvalue_male_late) %>%
arrange(sum_p) %>% head(1)) %>%
select(-sum_p) %>%
mutate_at(vars(contains("pval")), sci_notation) %>%
mutate(SNP_clump = replace(SNP_clump, SNP == SNP_clump, "")) %>%
rename(`Variant` = SNP,
`Variant clump` = SNP_clump,
`Site class` = site.class,
`Gene` = gene_name,
`Female early effect` = female_early,
`Female late effect` = female_late,
`Male early effect` = male_early,
`Male late effect` = male_late,
`Female early pval` = pvalue_female_early,
`Female late pval` = pvalue_female_late,
`Male early pval` = pvalue_male_early,
`Male late pval` = pvalue_male_late)
write_csv(GWAS_table, "data/derived/GWAS_significant_snps.csv")
This table shows variants (either individual SNPs or indels, or clumps of variants in complete linkage disequilibrium) that passed the statistical significance threshold of \(p < 10^{-5}\) for at least one of the four phenotypes, in a linear mixed model GWAS implemented in GEMMA. Column 3 gives the minor allele frequency, columns 4-5 identify genes that overlap the variant, and column 6 shows the site class. Columns 7-10 show the estimated effect size of the variant on the four phenotypes (where positive values mean that the minor allele is associated with higher fitness, and negative values that the minor allele is associated with lower fitness), and the final columns show the corresponding \(p\)-values.
GWAS_table %>% my_data_table()