Skip to content

Commit 3bc4179

Browse files
committed
Initial commit for github.
0 parents  commit 3bc4179

70 files changed

Lines changed: 3418 additions & 0 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.Rbuildignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
^.*\.Rproj$
2+
^\.Rproj\.user$
3+
^LICENSE\.md$

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
.Rproj.user
2+
.Rhistory
3+
.RData
4+
.Ruserdata

DESCRIPTION

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
Package: gnonadd
2+
Type: Package
3+
Title: Various Non-Additive Models for Genetic Associations
4+
Version: 1.0.0
5+
Authors@R: c(
6+
person("Audunn S.", "Snaebjarnarson", , "audunn.snaebjarnarson@decode.is", role = c("aut", "cre")),
7+
person("Gudmundur", "Einarsson", , "gudmundur.einarsson2@decode.is", role = c("aut")),
8+
person("Daniel F.", "Gudbjartsson", , "daniel.gudbjartsson@decode.is", role = c("aut"))
9+
)
10+
Description: The goal of gnonadd is to simplify workflows in the analysis of non-additive effects of
11+
sequence variants. This includes variance effects, correlation effects, interaction
12+
effects and dominance effects.
13+
License: MIT + file LICENSE
14+
Encoding: UTF-8
15+
LazyData: true
16+
RoxygenNote: 7.2.1
17+
Depends:
18+
R (>= 2.10)
19+
Imports:
20+
ggplot2,
21+
MASS

LICENSE

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
YEAR: 2023
2+
COPYRIGHT HOLDER: deCODE Genetics/AMGEN

LICENSE.md

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
# MIT License
2+
3+
Copyright (c) 2023 deCODE Genetics/AMGEN
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.

NAMESPACE

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
# Generated by roxygen2: do not edit by hand
2+
3+
export(PRS_creator)
4+
export(Var.assoc)
5+
export(VarGS.plot)
6+
export(Viol.by.gen)
7+
export(alpha.calc)
8+
export(alpha.cond)
9+
export(alpha.continuous.cond)
10+
export(alpha.multi.est)
11+
export(corr.calibration)
12+
export(dominance.calc)
13+
export(dominance_CC.calc)
14+
export(ellipse.by.gen)
15+
export(env_interaction.calc)
16+
export(env_interaction_CC.calc)
17+
export(expected.variance.effect)
18+
export(hist_by_gen)
19+
export(interaction.calc)
20+
export(interaction_CC.calc)
21+
export(kappa_calc)
22+
export(pairwise_env_int.calc)
23+
export(pairwise_env_int_CC.calc)
24+
export(pairwise_int.calc)
25+
export(pairwise_int_CC.calc)
26+
export(train_and_impute_PRS)
27+
export(var.adj)
28+
export(var.summary)

R/Dominance_CaseControl_model.R

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#' Genetic dominance effects on a case control variable
2+
#'
3+
#' @description
4+
#' This function estimates the dominance effect of a genetic variant on a case-control variable
5+
#' We apply a logistic regression model to estimate dominance effects.
6+
#' We include a linear term, coded as 0,1 and 2 for non-carriers, heterozygotes and homozygous carriers of the effect allele.
7+
#' We also include a dominance term, coded as 1 for homozygous carriers and 0 for others.
8+
#' Effect size and significance is based on the dominance term.
9+
#'
10+
#' @param cc A case control vector, containing 0's and 1's
11+
#' @param g A vector with (possibly imputed) genotype values. All entries should be larger than 0 and smaller than 2.
12+
#' @param yob A numerical vector containing year of birth. If some are unknown they should be marked as -1
13+
#' @param sex A numerical vector containing sex, coded 0 for males, 1 for females and -1 for unknown
14+
#' @param round_imputed A boolian variable determining whether imputed genotype values should be rounded to the nearest integer in the analysis
15+
#' @param covariates A dataframe containing any other covariates that should be used; one column per covariate.
16+
#'
17+
#' @returns
18+
#' A list with the dominanc effect (on log-scale) and corresponding standard error, z statistic and p-value
19+
#' @examples
20+
#' g_vec <- rbinom(100000, 2, 0.3)
21+
#' cc_vec <- rbinom(100000, 1, 0.1 * (1.2 ^ (g_vec^2)))
22+
#' res <- dominance_CC.calc(cc_vec, g_vec)
23+
#' @export
24+
dominance_CC.calc <- function(cc, g, yob=rep(-1,length(cc)), sex=rep(-1,length(cc)), round_imputed = F, covariates = as.data.frame(matrix(0, nrow = 0, ncol = 0))){
25+
g_rounded <- round(g)
26+
if(round_imputed == T){
27+
g <- round(g)
28+
}
29+
if(length(unique(as.factor(g_rounded))) < 3) {
30+
warning("Dominance effect undefined. There are no subjects of one or more genotype group.")
31+
delta <- NA
32+
se <- NA
33+
z <- NA
34+
p <- NA
35+
} else {
36+
no_date <- yob < 0
37+
yob[!no_date] <- yob[!no_date] - mean(yob[!no_date])
38+
yob[no_date] <- 0
39+
sex <- as.factor(sex)
40+
g <- g - mean(g)
41+
g2 <- as.numeric(g_rounded == 2)
42+
43+
#We define a dataframe containing all variables that should be considered
44+
Dom_data <- as.data.frame(cbind(cc, g2))
45+
Dom_data <- cbind(Dom_data, g)
46+
if(sd(yob) > 0) {
47+
Dom_data <- cbind(Dom_data, yob)
48+
}
49+
if(length(unique(no_date)) > 1) {
50+
Dom_data <- cbind(Dom_data, no_date)
51+
}
52+
if(length(unique(sex)) > 1) {
53+
Dom_data <- cbind(Dom_data, sex)
54+
}
55+
if(nrow(covariates) > 0) {
56+
Dom_data <- cbind(Dom_data, covariates)
57+
}
58+
59+
#We use logistic regression to estimate the dominance effect
60+
l_delta <- glm(cc ~ ., data = Dom_data, family = 'binomial')
61+
param <- "g2"
62+
if(param %in% rownames(coef(summary(l_delta)))){
63+
delta <- summary(l_delta)$coeff[param, 1]
64+
se <- summary(l_delta)$coeff[param, 2]
65+
z <- summary(l_delta)$coeff[param, 3]
66+
p <- summary(l_delta)$coeff[param, 4]
67+
}else{
68+
warning("Singular model matrix")
69+
delta <- NA
70+
se <- NA
71+
z <- NA
72+
p <- NA
73+
}
74+
}
75+
return(list(dominance_effect = delta, standard_error = se, z = z, pval = p))
76+
}
77+

R/Dominance_model.R

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#' Genetic dominance effects
2+
#'
3+
#' @description
4+
#' This function estimates the dominance effect of a genetic variant on a quantitatvie trait
5+
#' Nothing fancy here. We apply a simple linear regression model to estimate dominance effects.
6+
#' We include a linear term, coded as 0,1 and 2 for non-carriers, heterozygotes and homozygous carriers of the effect allele.
7+
#' We also include a dominance term, coded as 1 for homozygous carriers and 0 for others.
8+
#' Effect size and significance is based on the dominance term.
9+
#'
10+
#' @param qt A numeric vector
11+
#' @param g A vector with (possibly imputed) genotype values. All entries should be larger than 0 and smaller than 2.
12+
#' @param round_imputed A boolian variable determining whether imputed genotype values should be rounded to the nearest integer in the analysis
13+
#' @param covariates A dataframe containing any covariates that should be used; one column per covariate.
14+
#'
15+
#' @returns
16+
#' A list with the dominanc effect and corresponding standard error, t statistic and p-value
17+
#' @examples
18+
#' g_vec <- rbinom(100000, 2, 0.3)
19+
#' qt_vec <- rnorm(100000) + 0.2 * g_vec^2
20+
#' res <- dominance.calc(qt_vec, g_vec)
21+
#' @export
22+
dominance.calc <- function(qt, g, round_imputed = F, covariates = as.data.frame(matrix(0, nrow = 0, ncol = 0))){
23+
g_rounded <- round(g)
24+
if(round_imputed == T){
25+
g <- round(g)
26+
}
27+
if(length(unique(as.factor(g_rounded))) < 3) {
28+
warning("Dominance effect undefined. There are no subjects of one or more genotype group.")
29+
delta <- NA
30+
se <- NA
31+
t <- NA
32+
p <- NA
33+
}else {
34+
g2 <- as.numeric(g_rounded == 2)
35+
36+
#We define a dataframe containing all variables that should be considered
37+
Dom_data <- as.data.frame(cbind(qt, g2))
38+
Dom_data <- cbind(Dom_data, g)
39+
if(nrow(covariates) > 0) {
40+
Dom_data <- cbind(Dom_data, covariates)
41+
}
42+
43+
#We use linear regression to estimate the dominance effect
44+
l_delta <- lm(qt ~ ., data = Dom_data)
45+
param <- "g2"
46+
if(param %in% rownames(coef(summary(l_delta)))){
47+
delta <- summary(l_delta)$coeff[param, 1]
48+
se <- summary(l_delta)$coeff[param, 2]
49+
t <- summary(l_delta)$coeff[param, 3]
50+
p <- summary(l_delta)$coeff[param, 4]
51+
}else{
52+
warning("Singular model matrix")
53+
delta <- NA
54+
se <- NA
55+
t <- NA
56+
p <- NA
57+
}
58+
}
59+
return(list(dominance_effect = delta, standard_error = se, t = t, pval = p))
60+
}

R/Ellipse_by_genotype.R

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
#' Ellipse best fit plot
2+
#'
3+
#' @description
4+
#' This tool creates a scatter plot along with regression lines. Additionally it finds and plots the best ellipses that fit the data.
5+
#'
6+
#' @param qt1 A numeric vector.
7+
#' @param qt2 A numeric vector.
8+
#' @param g An integer vector.
9+
#' @param trait_name1 A string.
10+
#' @param trait_name2 A string.
11+
#' @param title A string.
12+
#' @param sample_size A positive integer.
13+
#' @returns
14+
#' A scatter plot.
15+
#' @examples
16+
#' n_val <- 10000L
17+
#' geno_vec <- c(rep(0, n_val), rep(1, n_val), rep(2, n_val))
18+
#' qt_g0 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(0.93, 0.88, 0.88, 0.92), ncol = 2))
19+
#' qt_g1 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(0.98, 0.88, 0.88, 0.90), ncol = 2))
20+
#' qt_g2 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(1.57, 0.81, 0.81, 0.59), ncol = 2))
21+
#' qt_vec <- rbind(qt_g0, qt_g1)
22+
#' qt_vec <- rbind(qt_vec, qt_g2)
23+
#' res <- ellipse.by.gen(qt_vec[, 1], qt_vec[, 2], geno_vec)
24+
#' @export ellipse.by.gen
25+
ellipse.by.gen <- function(qt1, qt2, g, trait_name1 = 'qt trait 1', trait_name2 = 'qt trait 2',
26+
title = '', sample_size = 500) {
27+
g <- round(g)
28+
D <- cbind(qt1, qt2)
29+
D <- cbind(D, g)
30+
D <- as.data.frame(D)
31+
colnames(D) <- c('qt1', 'qt2', 'g')
32+
D$g_factor <- factor(D$g, levels = 0:2, labels = c('Non-carriers', 'Heterozygotes', 'Homozygotes'))
33+
M <- as.data.frame(matrix(0,500,7))
34+
colnames(M) <- c('t','x0','y0','x1','y1','x2','y2')
35+
M$t <- (1:500/500)*2*pi
36+
D_sample <- D[c(),]
37+
Arrow_data <- as.data.frame(matrix(0,6,4))
38+
colnames(Arrow_data) <- c('start_x', 'start_y', 'end_x', 'end_y')
39+
for(i in 0:2) {
40+
D_temp <- D[D$g == i, ]
41+
if(nrow(D_temp) > 0) {
42+
D_sample <- rbind(D_sample, D_temp[sample(1:nrow(D_temp), size = min(sample_size, nrow(D_temp)), replace = FALSE), ])
43+
qt1_mean <- mean(D_temp$qt1)
44+
qt2_mean <- mean(D_temp$qt2)
45+
Sigma <- cov(D_temp[, c(1, 2)])
46+
Princip <- eigen(Sigma)
47+
flip_direction1 <- 0
48+
flip_direction2 <- 0
49+
if(Princip$vectors[1, 1] < 0){
50+
flip_direction1 <- 1
51+
}
52+
if(Princip$vectors[2, 2] < 0){
53+
flip_direction2 <- 1
54+
}
55+
M[,2 + 2 * i] <- qt1_mean + Princip$vectors[1, 1] * sqrt(Princip$values[1]) * cos(M$t) + Princip$vectors[1, 2] * sqrt(Princip$values[2]) * sin(M$t)
56+
M[,3 + 2 * i] <- qt2_mean + Princip$vectors[2, 1] * sqrt(Princip$values[1]) * cos(M$t) + Princip$vectors[2, 2] * sqrt(Princip$values[2]) * sin(M$t)
57+
Arrow_data[i + 1, 1] <- qt1_mean
58+
Arrow_data[i + 1, 2] <- qt2_mean
59+
Arrow_data[i + 1, 3] <- qt1_mean + (-1)^flip_direction1 * Princip$vectors[1,1] * sqrt(Princip$values[1])
60+
Arrow_data[i + 1, 4] <- qt2_mean + (-1)^flip_direction1 * Princip$vectors[2,1] * sqrt(Princip$values[1])
61+
Arrow_data[i + 4, 1] <- qt1_mean
62+
Arrow_data[i + 4, 2] <- qt2_mean
63+
Arrow_data[i + 4, 3] <- qt1_mean + (-1)^flip_direction2 * Princip$vectors[1,2] * sqrt(Princip$values[2])
64+
Arrow_data[i + 4, 4] <- qt2_mean + (-1)^flip_direction2 * Princip$vectors[2,2] * sqrt(Princip$values[2])
65+
}
66+
}
67+
ggplot2::ggplot(D_sample, ggplot2::aes(x = qt1 , y = qt2 ,color = g_factor))+
68+
ggplot2::geom_point()+ggplot2::theme_classic()+
69+
ggplot2::geom_smooth(method = 'lm', data = D, se = F, formula = as.formula('y ~ x')) +
70+
ggplot2::coord_fixed() +
71+
ggplot2::scale_color_manual(values = c('Non-carriers' = '#F8766D', 'Heterozygotes' = '#00BA38', 'Homozygotes' = '#619CFF')) +
72+
ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[1, 1], y = Arrow_data[1, 2],
73+
xend = Arrow_data[1, 3], yend = Arrow_data[1, 4] ),
74+
color = 'red', size = 1, arrow = ggplot2::arrow()) +
75+
ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[4, 1], y = Arrow_data[4, 2],
76+
xend = Arrow_data[4, 3], yend = Arrow_data[4, 4] ),
77+
color = 'red', size = 1, arrow = ggplot2::arrow()) +
78+
ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[2, 1], y = Arrow_data[2, 2],
79+
xend = Arrow_data[2, 3], yend = Arrow_data[2, 4] ),
80+
color = 'green', size = 1, arrow = ggplot2::arrow()) +
81+
ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[5, 1], y = Arrow_data[5, 2],
82+
xend = Arrow_data[5, 3], yend = Arrow_data[5, 4] ),
83+
color = 'green', size = 1, arrow = ggplot2::arrow()) +
84+
ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[3, 1], y = Arrow_data[3, 2],
85+
xend = Arrow_data[3, 3], yend = Arrow_data[3, 4] ),
86+
color = 'blue', size = 1, arrow = ggplot2::arrow()) +
87+
ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[6, 1], y = Arrow_data[6, 2],
88+
xend = Arrow_data[6, 3], yend = Arrow_data[6, 4] ),
89+
color = 'blue', size = 1, arrow = ggplot2::arrow()) +
90+
ggplot2::geom_polygon(data=M, ggplot2::aes(x=x0,y=y0), color='red',fill=NA,size=1.5) +
91+
ggplot2::geom_polygon(data=M, ggplot2::aes(x=x1,y=y1), color='green',fill=NA,size=1.5) +
92+
ggplot2::geom_polygon(data=M, ggplot2::aes(x=x2,y=y2), color='blue',fill=NA,size=1.5) +
93+
ggplot2::xlab(trait_name1)+ggplot2::ylab(trait_name2) + ggplot2::ggtitle(title)
94+
}
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#' Pairwise environmental interaction effects for a case control variable
2+
#'
3+
#' @description
4+
#' Given a set of variants and environmental traits, and a single case control variable, this function calculates the interaction effect of all possible variant-environmental pairs
5+
#'
6+
#' @param cc A numeric vector
7+
#' @param g A matrix, where each colomn represents a variant
8+
#' @param env A matrix, where each row represents an environmental variable
9+
#' @param yob A numerical vector containing year of birth. If some are unknown they should be marked as -1
10+
#' @param sex A numerical vector containing sex, coded 0 for males, 1 for females and -1 for unknown
11+
#' @param round_imputed A boolian variable determining whether imputed genotype values should be rounded to the nearest integer in the analysis.
12+
#' @param dominance_term A boolian variable determining whether a dominance term for the variant should be included as a covariates in the analysis
13+
#' @param square_env A boolian variable determining whether the square of the environmental trait should be included as a covariate in the analysis
14+
#' @param covariates A dataframe containing any other covariates that should be used; one column per covariate
15+
#' @param variant_names A list of the names of the variants
16+
#' @param env_names A list of the names of the environmental variables
17+
#'
18+
#' @returns
19+
#' A dataframe with all possible variant-environmental pairs and their estimated interaction effect
20+
#' @examples
21+
#' g_vec <- matrix(0, nrow = 100000, ncol = 3)
22+
#' freqs <- runif(ncol(g_vec), min = 0, max = 1)
23+
#' env_vec <- matrix(0, nrow = 100000, ncol = 3)
24+
#' for(i in 1:ncol(g_vec)){
25+
#' g_vec[, i] <- rbinom(100000, 2, freqs[i])
26+
#' }
27+
#' for( i in 1:ncol(env_vec)){
28+
#' env_vec[, i] <- round(runif(100000,min=0,max=6))
29+
#' }
30+
#' cc_vec <- rbinom(100000,1,0.1 * (1.05 ^ g_vec[, 1]) * (1.06 ^ env_vec[,1]) * (0.95 ^ g_vec[, 2]) * (1.1^(g_vec[, 1] * env_vec[, 1])))
31+
#' res <- pairwise_env_int_CC.calc(cc_vec, g_vec, env_vec)
32+
#' @export
33+
pairwise_env_int_CC.calc <- function(cc, g, env, yob = rep(-1,length(cc)), sex = rep(-1,length(cc)),
34+
round_imputed = F, dominance_term = F, square_env = F, covariates = as.data.frame(matrix(0, nrow = 0, ncol = 0)),
35+
variant_names = paste(rep('variant', ncol(g)), as.character(1:ncol(g)), sep="_"),
36+
env_names = paste(rep('env', ncol(env)), as.character(1:ncol(env)), sep="_")){
37+
pair_number <- ncol(g) * ncol(env)
38+
A <- data.frame(matrix(0, nrow = pair_number, ncol=6))
39+
colnames(A) <- c('variant_name', 'env_name', 'int_effect', 'se', 'z', 'pval')
40+
41+
counter <- 0
42+
for(i in 1:ncol(g)) {
43+
for(j in 1:ncol(env)){
44+
counter <- counter + 1
45+
A$variant_name[counter] <- variant_names[i]
46+
A$env_name[counter] <- env_names[j]
47+
res <- env_interaction_CC.calc(cc, g[, i], env[, j], yob = yob, sex = sex,
48+
round_imputed = round_imputed, dominance_term = dominance_term, square_env = square_env, covariates = covariates )
49+
A$int_effect[counter] <- res$interaction_effect
50+
A$se[counter] <- res$standard_error
51+
A$z[counter] <- res$z
52+
A$pval[counter] <- res$pval
53+
54+
}
55+
}
56+
return(A)
57+
}
58+
59+
60+

0 commit comments

Comments
 (0)