DecodeGenetics
diff --git a/‎.Rbuildignore‎
Lines changed: 3 additions & 0 deletions b/‎.Rbuildignore‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎.gitignore‎
Lines changed: 4 additions & 0 deletions b/‎.gitignore‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎DESCRIPTION‎
Lines changed: 21 additions & 0 deletions b/‎DESCRIPTION‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎LICENSE‎
Lines changed: 2 additions & 0 deletions b/‎LICENSE‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎LICENSE.md‎
Lines changed: 21 additions & 0 deletions b/‎LICENSE.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎NAMESPACE‎
Lines changed: 28 additions & 0 deletions b/‎NAMESPACE‎
Lines changed: 28 additions & 0 deletions
diff --git a/‎R/Dominance_CaseControl_model.R‎
Lines changed: 77 additions & 0 deletions b/‎R/Dominance_CaseControl_model.R‎
Lines changed: 77 additions & 0 deletions
diff --git a/‎R/Dominance_model.R‎
Lines changed: 60 additions & 0 deletions b/‎R/Dominance_model.R‎
Lines changed: 60 additions & 0 deletions
diff --git a/‎R/Ellipse_by_genotype.R‎
Lines changed: 94 additions & 0 deletions b/‎R/Ellipse_by_genotype.R‎
Lines changed: 94 additions & 0 deletions
diff --git a/‎R/Env_interaction_CaseControl_all_vs_all.R‎
Lines changed: 60 additions & 0 deletions b/‎R/Env_interaction_CaseControl_all_vs_all.R‎
Lines changed: 60 additions & 0 deletions
@@ -0,0 +1,3 @@
+^.*\.Rproj$
+^\.Rproj\.user$
+^LICENSE\.md$
@@ -0,0 +1,4 @@
+.Rproj.user
+.Rhistory
+.RData
+.Ruserdata
@@ -0,0 +1,21 @@
+Package: gnonadd
+Type: Package
+Title: Various Non-Additive Models for Genetic Associations
+Version: 1.0.0
+Authors@R: c(
+     person("Audunn S.", "Snaebjarnarson", , "audunn.snaebjarnarson@decode.is", role = c("aut", "cre")),
+     person("Gudmundur", "Einarsson", , "gudmundur.einarsson2@decode.is", role = c("aut")),
+     person("Daniel F.", "Gudbjartsson", , "daniel.gudbjartsson@decode.is", role = c("aut"))
+     )
+Description: The goal of gnonadd is to simplify workflows in the analysis of non-additive effects of 
+    sequence variants. This includes variance effects, correlation effects, interaction 
+    effects and dominance effects.
+License: MIT + file LICENSE
+Encoding: UTF-8
+LazyData: true
+RoxygenNote: 7.2.1
+Depends: 
+    R (>= 2.10)
+Imports: 
+    ggplot2,
+    MASS
@@ -0,0 +1,2 @@
+YEAR: 2023
+COPYRIGHT HOLDER: deCODE Genetics/AMGEN
@@ -0,0 +1,21 @@
+# MIT License
+
+Copyright (c) 2023 deCODE Genetics/AMGEN
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
@@ -0,0 +1,28 @@
+# Generated by roxygen2: do not edit by hand
+
+export(PRS_creator)
+export(Var.assoc)
+export(VarGS.plot)
+export(Viol.by.gen)
+export(alpha.calc)
+export(alpha.cond)
+export(alpha.continuous.cond)
+export(alpha.multi.est)
+export(corr.calibration)
+export(dominance.calc)
+export(dominance_CC.calc)
+export(ellipse.by.gen)
+export(env_interaction.calc)
+export(env_interaction_CC.calc)
+export(expected.variance.effect)
+export(hist_by_gen)
+export(interaction.calc)
+export(interaction_CC.calc)
+export(kappa_calc)
+export(pairwise_env_int.calc)
+export(pairwise_env_int_CC.calc)
+export(pairwise_int.calc)
+export(pairwise_int_CC.calc)
+export(train_and_impute_PRS)
+export(var.adj)
+export(var.summary)
@@ -0,0 +1,77 @@
+#' Genetic dominance effects on a case control variable
+#'
+#' @description
+#' This function estimates the dominance effect of a genetic variant on a case-control variable
+#' We apply a logistic regression model to estimate dominance effects.
+#' We include a linear term, coded as 0,1 and 2 for non-carriers, heterozygotes and homozygous carriers of the effect allele.
+#' We also include a dominance term, coded as 1 for homozygous carriers and 0 for others.
+#' Effect size and significance is based on the dominance term.
+#'
+#' @param cc A case control vector, containing 0's and 1's
+#' @param g A vector with (possibly imputed) genotype values. All entries should be larger than 0 and smaller than 2.
+#' @param yob A numerical vector containing year of birth. If some are unknown they should be marked as -1
+#' @param sex A numerical vector containing sex, coded 0 for males, 1 for females and -1 for unknown
+#' @param round_imputed A boolian variable determining whether imputed genotype values should be rounded to the nearest integer in the analysis
+#' @param covariates A dataframe containing any other covariates that should be used; one column per covariate.
+#' 
+#' @returns
+#' A list with the dominanc effect (on log-scale) and corresponding standard error, z statistic and p-value
+#' @examples
+#' g_vec <- rbinom(100000, 2, 0.3)
+#' cc_vec <- rbinom(100000, 1, 0.1 * (1.2 ^ (g_vec^2)))
+#' res <- dominance_CC.calc(cc_vec, g_vec)
+#' @export
+dominance_CC.calc <- function(cc, g, yob=rep(-1,length(cc)), sex=rep(-1,length(cc)), round_imputed = F, covariates = as.data.frame(matrix(0, nrow = 0, ncol = 0))){
+  g_rounded <- round(g)
+  if(round_imputed == T){
+    g <- round(g)
+  }
+  if(length(unique(as.factor(g_rounded))) < 3) {
+    warning("Dominance effect undefined. There are no subjects of one or more genotype group.")
+    delta <- NA
+    se <- NA
+    z <- NA
+    p <- NA
+  } else {
+    no_date <- yob < 0
+    yob[!no_date] <- yob[!no_date] - mean(yob[!no_date])
+    yob[no_date] <- 0
+    sex <- as.factor(sex)
+    g <- g - mean(g)
+    g2 <- as.numeric(g_rounded == 2)
+    
+    #We define a dataframe containing all variables that should be considered
+    Dom_data <- as.data.frame(cbind(cc, g2))
+    Dom_data <- cbind(Dom_data, g)
+    if(sd(yob) > 0) {
+      Dom_data <- cbind(Dom_data, yob)
+    }
+    if(length(unique(no_date)) > 1) {
+      Dom_data <- cbind(Dom_data, no_date)
+    }
+    if(length(unique(sex)) > 1) {
+      Dom_data <- cbind(Dom_data, sex)
+    }
+    if(nrow(covariates) > 0) {
+      Dom_data <- cbind(Dom_data, covariates)
+    }
+    
+    #We use logistic regression to estimate the dominance effect
+    l_delta <- glm(cc ~ ., data = Dom_data, family = 'binomial')
+    param <- "g2"
+    if(param %in% rownames(coef(summary(l_delta)))){
+      delta <- summary(l_delta)$coeff[param, 1]
+      se <- summary(l_delta)$coeff[param, 2]
+      z <- summary(l_delta)$coeff[param, 3]
+      p <- summary(l_delta)$coeff[param, 4] 
+    }else{
+      warning("Singular model matrix")
+      delta <- NA
+      se <- NA
+      z <- NA
+      p <- NA
+    }
+  }
+  return(list(dominance_effect = delta, standard_error = se, z = z, pval = p))
+}
+
@@ -0,0 +1,60 @@
+#' Genetic dominance effects
+#'
+#' @description
+#' This function estimates the dominance effect of a genetic variant on a quantitatvie trait
+#' Nothing fancy here. We apply a simple linear regression model to estimate dominance effects.
+#' We include a linear term, coded as 0,1 and 2 for non-carriers, heterozygotes and homozygous carriers of the effect allele.
+#' We also include a dominance term, coded as 1 for homozygous carriers and 0 for others.
+#' Effect size and significance is based on the dominance term.
+#'
+#' @param qt A numeric vector
+#' @param g A vector with (possibly imputed) genotype values. All entries should be larger than 0 and smaller than 2.
+#' @param round_imputed A boolian variable determining whether imputed genotype values should be rounded to the nearest integer in the analysis
+#' @param covariates A dataframe containing any covariates that should be used; one column per covariate.
+#' 
+#' @returns
+#' A list with the dominanc effect and corresponding standard error, t statistic and p-value
+#' @examples
+#' g_vec <- rbinom(100000, 2, 0.3)
+#' qt_vec <- rnorm(100000) + 0.2 * g_vec^2
+#' res <- dominance.calc(qt_vec, g_vec)
+#' @export
+dominance.calc <- function(qt, g, round_imputed = F, covariates = as.data.frame(matrix(0, nrow = 0, ncol = 0))){
+  g_rounded <- round(g)
+  if(round_imputed == T){
+    g <- round(g)
+  }
+  if(length(unique(as.factor(g_rounded))) < 3) {
+    warning("Dominance effect undefined. There are no subjects of one or more genotype group.")
+    delta <- NA
+    se <- NA
+    t <- NA
+    p <- NA
+  }else {
+    g2 <- as.numeric(g_rounded == 2)
+    
+    #We define a dataframe containing all variables that should be considered
+    Dom_data <- as.data.frame(cbind(qt, g2))
+    Dom_data <- cbind(Dom_data, g)
+    if(nrow(covariates) > 0) {
+      Dom_data <- cbind(Dom_data, covariates)
+    }
+    
+    #We use linear regression to estimate the dominance effect
+    l_delta <- lm(qt ~ ., data = Dom_data)
+    param <- "g2"
+    if(param %in% rownames(coef(summary(l_delta)))){
+      delta <- summary(l_delta)$coeff[param, 1]
+      se <- summary(l_delta)$coeff[param, 2]
+      t <- summary(l_delta)$coeff[param, 3]
+      p <- summary(l_delta)$coeff[param, 4] 
+    }else{
+      warning("Singular model matrix")
+      delta <- NA
+      se <- NA
+      t <- NA
+      p <- NA
+    }
+  }
+  return(list(dominance_effect = delta, standard_error = se, t = t, pval = p))
+}
@@ -0,0 +1,94 @@
+#' Ellipse best fit plot
+#'
+#' @description
+#' This tool creates a scatter plot along with regression lines. Additionally it finds and plots the best ellipses that fit the data.
+#'
+#' @param qt1 A numeric vector.
+#' @param qt2 A numeric vector.
+#' @param g An integer vector.
+#' @param trait_name1 A string.
+#' @param trait_name2 A string.
+#' @param title A string.
+#' @param sample_size A positive integer.
+#' @returns
+#' A scatter plot.
+#' @examples
+#' n_val <- 10000L
+#' geno_vec <- c(rep(0, n_val), rep(1, n_val), rep(2, n_val))
+#' qt_g0 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(0.93, 0.88, 0.88, 0.92), ncol = 2))
+#' qt_g1 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(0.98, 0.88, 0.88, 0.90), ncol = 2))
+#' qt_g2 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(1.57, 0.81, 0.81, 0.59), ncol = 2))
+#' qt_vec <- rbind(qt_g0, qt_g1)
+#' qt_vec <- rbind(qt_vec, qt_g2)
+#' res <- ellipse.by.gen(qt_vec[, 1], qt_vec[, 2], geno_vec)
+#' @export ellipse.by.gen
+ellipse.by.gen <- function(qt1, qt2, g, trait_name1 = 'qt trait 1', trait_name2 = 'qt trait 2',
+                        title = '', sample_size = 500) {
+  g <- round(g)
+  D <- cbind(qt1, qt2)
+  D <- cbind(D, g)
+  D <- as.data.frame(D)
+  colnames(D) <- c('qt1', 'qt2', 'g')
+  D$g_factor <- factor(D$g, levels = 0:2, labels = c('Non-carriers', 'Heterozygotes', 'Homozygotes'))
+  M <- as.data.frame(matrix(0,500,7))
+  colnames(M) <- c('t','x0','y0','x1','y1','x2','y2')
+  M$t <- (1:500/500)*2*pi
+  D_sample <- D[c(),]
+  Arrow_data <- as.data.frame(matrix(0,6,4))
+  colnames(Arrow_data) <- c('start_x', 'start_y', 'end_x', 'end_y')
+  for(i in 0:2) {
+    D_temp <- D[D$g == i, ]
+    if(nrow(D_temp) > 0) {
+      D_sample <- rbind(D_sample, D_temp[sample(1:nrow(D_temp), size = min(sample_size, nrow(D_temp)), replace = FALSE), ])
+      qt1_mean <- mean(D_temp$qt1)
+      qt2_mean <- mean(D_temp$qt2)
+      Sigma <- cov(D_temp[, c(1, 2)])
+      Princip <- eigen(Sigma)
+      flip_direction1 <- 0
+      flip_direction2 <- 0
+      if(Princip$vectors[1, 1] < 0){
+        flip_direction1 <- 1
+      }
+      if(Princip$vectors[2, 2] < 0){
+        flip_direction2 <- 1
+      }
+      M[,2 + 2 * i] <- qt1_mean + Princip$vectors[1, 1] * sqrt(Princip$values[1]) * cos(M$t) + Princip$vectors[1, 2] * sqrt(Princip$values[2]) * sin(M$t)
+      M[,3 + 2 * i] <- qt2_mean + Princip$vectors[2, 1] * sqrt(Princip$values[1]) * cos(M$t) + Princip$vectors[2, 2] * sqrt(Princip$values[2]) * sin(M$t)
+      Arrow_data[i + 1, 1] <- qt1_mean
+      Arrow_data[i + 1, 2] <- qt2_mean
+      Arrow_data[i + 1, 3] <- qt1_mean + (-1)^flip_direction1 * Princip$vectors[1,1] * sqrt(Princip$values[1])
+      Arrow_data[i + 1, 4] <- qt2_mean + (-1)^flip_direction1 * Princip$vectors[2,1] * sqrt(Princip$values[1])
+      Arrow_data[i + 4, 1] <- qt1_mean
+      Arrow_data[i + 4, 2] <- qt2_mean
+      Arrow_data[i + 4, 3] <- qt1_mean + (-1)^flip_direction2 * Princip$vectors[1,2] * sqrt(Princip$values[2])
+      Arrow_data[i + 4, 4] <- qt2_mean + (-1)^flip_direction2 * Princip$vectors[2,2] * sqrt(Princip$values[2])
+    }
+  }
+  ggplot2::ggplot(D_sample, ggplot2::aes(x = qt1 , y = qt2 ,color = g_factor))+
+    ggplot2::geom_point()+ggplot2::theme_classic()+
+    ggplot2::geom_smooth(method = 'lm', data = D, se = F, formula = as.formula('y ~ x')) +
+    ggplot2::coord_fixed() +
+    ggplot2::scale_color_manual(values = c('Non-carriers' = '#F8766D', 'Heterozygotes' = '#00BA38', 'Homozygotes' = '#619CFF')) +
+    ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[1, 1], y = Arrow_data[1, 2],
+                              xend = Arrow_data[1, 3], yend = Arrow_data[1, 4] ),
+                          color = 'red', size = 1, arrow = ggplot2::arrow()) +
+    ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[4, 1], y = Arrow_data[4, 2],
+                              xend = Arrow_data[4, 3], yend = Arrow_data[4, 4] ),
+                          color = 'red', size = 1, arrow = ggplot2::arrow()) +
+    ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[2, 1], y = Arrow_data[2, 2],
+                              xend = Arrow_data[2, 3], yend = Arrow_data[2, 4] ),
+                          color = 'green', size = 1, arrow = ggplot2::arrow()) +
+    ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[5, 1], y = Arrow_data[5, 2],
+                              xend = Arrow_data[5, 3], yend = Arrow_data[5, 4] ),
+                          color = 'green', size = 1, arrow = ggplot2::arrow()) +
+    ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[3, 1], y = Arrow_data[3, 2],
+                              xend = Arrow_data[3, 3], yend = Arrow_data[3, 4] ),
+                          color = 'blue', size = 1, arrow = ggplot2::arrow()) +
+    ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[6, 1], y = Arrow_data[6, 2],
+                              xend = Arrow_data[6, 3], yend = Arrow_data[6, 4] ),
+                          color = 'blue', size = 1, arrow = ggplot2::arrow()) +
+    ggplot2::geom_polygon(data=M, ggplot2::aes(x=x0,y=y0), color='red',fill=NA,size=1.5) +
+    ggplot2::geom_polygon(data=M, ggplot2::aes(x=x1,y=y1), color='green',fill=NA,size=1.5) +
+    ggplot2::geom_polygon(data=M, ggplot2::aes(x=x2,y=y2), color='blue',fill=NA,size=1.5) +
+    ggplot2::xlab(trait_name1)+ggplot2::ylab(trait_name2) + ggplot2::ggtitle(title)
+}
@@ -0,0 +1,60 @@
+#' Pairwise environmental interaction effects for a case control variable
+#'
+#' @description
+#' Given a set of variants and environmental traits, and a single case control variable, this function calculates the interaction effect of all possible variant-environmental pairs
+#' 
+#' @param cc A numeric vector
+#' @param g A matrix, where each colomn represents a variant
+#' @param env A matrix, where each row represents an environmental variable
+#' @param yob A numerical vector containing year of birth. If some are unknown they should be marked as -1
+#' @param sex A numerical vector containing sex, coded 0 for males, 1 for females and -1 for unknown
+#' @param round_imputed A boolian variable determining whether imputed genotype values should be rounded to the nearest integer in the analysis.
+#' @param dominance_term A boolian variable determining whether a dominance term for the variant should be included as a covariates in the analysis
+#' @param square_env A boolian variable determining whether the square of the environmental trait should be included as a covariate in the analysis
+#' @param covariates A dataframe containing any other covariates that should be used; one column per covariate
+#' @param variant_names A list of the names of the variants
+#' @param env_names A list of the names of the environmental variables
+#'
+#' @returns
+#' A dataframe with all possible variant-environmental pairs and their estimated interaction effect
+#' @examples
+#' g_vec <- matrix(0, nrow = 100000, ncol = 3)
+#' freqs <- runif(ncol(g_vec), min = 0, max = 1)
+#' env_vec <- matrix(0, nrow = 100000, ncol = 3)
+#' for(i in 1:ncol(g_vec)){
+#'  g_vec[, i] <- rbinom(100000, 2, freqs[i])
+#' }
+#' for( i in 1:ncol(env_vec)){
+#'  env_vec[, i] <- round(runif(100000,min=0,max=6))
+#' }
+#' cc_vec <- rbinom(100000,1,0.1 * (1.05 ^ g_vec[, 1]) * (1.06 ^ env_vec[,1]) * (0.95 ^ g_vec[, 2]) * (1.1^(g_vec[, 1] * env_vec[, 1])))
+#' res <- pairwise_env_int_CC.calc(cc_vec, g_vec, env_vec)
+#' @export
+pairwise_env_int_CC.calc <- function(cc, g, env, yob = rep(-1,length(cc)), sex = rep(-1,length(cc)),
+                                     round_imputed = F, dominance_term = F, square_env = F, covariates = as.data.frame(matrix(0, nrow = 0, ncol = 0)),
+                                     variant_names = paste(rep('variant', ncol(g)), as.character(1:ncol(g)), sep="_"),
+                                     env_names =  paste(rep('env', ncol(env)), as.character(1:ncol(env)), sep="_")){
+  pair_number <- ncol(g) * ncol(env)
+  A <- data.frame(matrix(0, nrow = pair_number, ncol=6))
+  colnames(A) <- c('variant_name', 'env_name', 'int_effect', 'se', 'z', 'pval')
+
+  counter <- 0
+  for(i in 1:ncol(g)) {
+    for(j in 1:ncol(env)){
+      counter <- counter + 1
+      A$variant_name[counter] <- variant_names[i]
+      A$env_name[counter] <- env_names[j]
+      res <- env_interaction_CC.calc(cc, g[, i], env[, j], yob = yob, sex = sex, 
+                                     round_imputed = round_imputed, dominance_term = dominance_term, square_env = square_env, covariates = covariates )
+      A$int_effect[counter] <- res$interaction_effect
+      A$se[counter] <- res$standard_error
+      A$z[counter] <- res$z
+      A$pval[counter] <- res$pval
+
+    }
+  }
+  return(A)
+}
+
+
+
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+^.*\.Rproj$`
	`2`	`+^\.Rproj\.user$`
	`3`	`+^LICENSE\.md$`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+YEAR: 2023`
	`2`	`+COPYRIGHT HOLDER: deCODE Genetics/AMGEN`