|
| 1 | +#' Ellipse best fit plot |
| 2 | +#' |
| 3 | +#' @description |
| 4 | +#' This tool creates a scatter plot along with regression lines. Additionally it finds and plots the best ellipses that fit the data. |
| 5 | +#' |
| 6 | +#' @param qt1 A numeric vector. |
| 7 | +#' @param qt2 A numeric vector. |
| 8 | +#' @param g An integer vector. |
| 9 | +#' @param trait_name1 A string. |
| 10 | +#' @param trait_name2 A string. |
| 11 | +#' @param title A string. |
| 12 | +#' @param sample_size A positive integer. |
| 13 | +#' @returns |
| 14 | +#' A scatter plot. |
| 15 | +#' @examples |
| 16 | +#' n_val <- 10000L |
| 17 | +#' geno_vec <- c(rep(0, n_val), rep(1, n_val), rep(2, n_val)) |
| 18 | +#' qt_g0 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(0.93, 0.88, 0.88, 0.92), ncol = 2)) |
| 19 | +#' qt_g1 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(0.98, 0.88, 0.88, 0.90), ncol = 2)) |
| 20 | +#' qt_g2 <- MASS::mvrnorm(n_val, mu = c(0, 0), Sigma = matrix(c(1.57, 0.81, 0.81, 0.59), ncol = 2)) |
| 21 | +#' qt_vec <- rbind(qt_g0, qt_g1) |
| 22 | +#' qt_vec <- rbind(qt_vec, qt_g2) |
| 23 | +#' res <- ellipse.by.gen(qt_vec[, 1], qt_vec[, 2], geno_vec) |
| 24 | +#' @export ellipse.by.gen |
| 25 | +ellipse.by.gen <- function(qt1, qt2, g, trait_name1 = 'qt trait 1', trait_name2 = 'qt trait 2', |
| 26 | + title = '', sample_size = 500) { |
| 27 | + g <- round(g) |
| 28 | + D <- cbind(qt1, qt2) |
| 29 | + D <- cbind(D, g) |
| 30 | + D <- as.data.frame(D) |
| 31 | + colnames(D) <- c('qt1', 'qt2', 'g') |
| 32 | + D$g_factor <- factor(D$g, levels = 0:2, labels = c('Non-carriers', 'Heterozygotes', 'Homozygotes')) |
| 33 | + M <- as.data.frame(matrix(0,500,7)) |
| 34 | + colnames(M) <- c('t','x0','y0','x1','y1','x2','y2') |
| 35 | + M$t <- (1:500/500)*2*pi |
| 36 | + D_sample <- D[c(),] |
| 37 | + Arrow_data <- as.data.frame(matrix(0,6,4)) |
| 38 | + colnames(Arrow_data) <- c('start_x', 'start_y', 'end_x', 'end_y') |
| 39 | + for(i in 0:2) { |
| 40 | + D_temp <- D[D$g == i, ] |
| 41 | + if(nrow(D_temp) > 0) { |
| 42 | + D_sample <- rbind(D_sample, D_temp[sample(1:nrow(D_temp), size = min(sample_size, nrow(D_temp)), replace = FALSE), ]) |
| 43 | + qt1_mean <- mean(D_temp$qt1) |
| 44 | + qt2_mean <- mean(D_temp$qt2) |
| 45 | + Sigma <- cov(D_temp[, c(1, 2)]) |
| 46 | + Princip <- eigen(Sigma) |
| 47 | + flip_direction1 <- 0 |
| 48 | + flip_direction2 <- 0 |
| 49 | + if(Princip$vectors[1, 1] < 0){ |
| 50 | + flip_direction1 <- 1 |
| 51 | + } |
| 52 | + if(Princip$vectors[2, 2] < 0){ |
| 53 | + flip_direction2 <- 1 |
| 54 | + } |
| 55 | + M[,2 + 2 * i] <- qt1_mean + Princip$vectors[1, 1] * sqrt(Princip$values[1]) * cos(M$t) + Princip$vectors[1, 2] * sqrt(Princip$values[2]) * sin(M$t) |
| 56 | + M[,3 + 2 * i] <- qt2_mean + Princip$vectors[2, 1] * sqrt(Princip$values[1]) * cos(M$t) + Princip$vectors[2, 2] * sqrt(Princip$values[2]) * sin(M$t) |
| 57 | + Arrow_data[i + 1, 1] <- qt1_mean |
| 58 | + Arrow_data[i + 1, 2] <- qt2_mean |
| 59 | + Arrow_data[i + 1, 3] <- qt1_mean + (-1)^flip_direction1 * Princip$vectors[1,1] * sqrt(Princip$values[1]) |
| 60 | + Arrow_data[i + 1, 4] <- qt2_mean + (-1)^flip_direction1 * Princip$vectors[2,1] * sqrt(Princip$values[1]) |
| 61 | + Arrow_data[i + 4, 1] <- qt1_mean |
| 62 | + Arrow_data[i + 4, 2] <- qt2_mean |
| 63 | + Arrow_data[i + 4, 3] <- qt1_mean + (-1)^flip_direction2 * Princip$vectors[1,2] * sqrt(Princip$values[2]) |
| 64 | + Arrow_data[i + 4, 4] <- qt2_mean + (-1)^flip_direction2 * Princip$vectors[2,2] * sqrt(Princip$values[2]) |
| 65 | + } |
| 66 | + } |
| 67 | + ggplot2::ggplot(D_sample, ggplot2::aes(x = qt1 , y = qt2 ,color = g_factor))+ |
| 68 | + ggplot2::geom_point()+ggplot2::theme_classic()+ |
| 69 | + ggplot2::geom_smooth(method = 'lm', data = D, se = F, formula = as.formula('y ~ x')) + |
| 70 | + ggplot2::coord_fixed() + |
| 71 | + ggplot2::scale_color_manual(values = c('Non-carriers' = '#F8766D', 'Heterozygotes' = '#00BA38', 'Homozygotes' = '#619CFF')) + |
| 72 | + ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[1, 1], y = Arrow_data[1, 2], |
| 73 | + xend = Arrow_data[1, 3], yend = Arrow_data[1, 4] ), |
| 74 | + color = 'red', size = 1, arrow = ggplot2::arrow()) + |
| 75 | + ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[4, 1], y = Arrow_data[4, 2], |
| 76 | + xend = Arrow_data[4, 3], yend = Arrow_data[4, 4] ), |
| 77 | + color = 'red', size = 1, arrow = ggplot2::arrow()) + |
| 78 | + ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[2, 1], y = Arrow_data[2, 2], |
| 79 | + xend = Arrow_data[2, 3], yend = Arrow_data[2, 4] ), |
| 80 | + color = 'green', size = 1, arrow = ggplot2::arrow()) + |
| 81 | + ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[5, 1], y = Arrow_data[5, 2], |
| 82 | + xend = Arrow_data[5, 3], yend = Arrow_data[5, 4] ), |
| 83 | + color = 'green', size = 1, arrow = ggplot2::arrow()) + |
| 84 | + ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[3, 1], y = Arrow_data[3, 2], |
| 85 | + xend = Arrow_data[3, 3], yend = Arrow_data[3, 4] ), |
| 86 | + color = 'blue', size = 1, arrow = ggplot2::arrow()) + |
| 87 | + ggplot2::geom_segment(ggplot2::aes(x = Arrow_data[6, 1], y = Arrow_data[6, 2], |
| 88 | + xend = Arrow_data[6, 3], yend = Arrow_data[6, 4] ), |
| 89 | + color = 'blue', size = 1, arrow = ggplot2::arrow()) + |
| 90 | + ggplot2::geom_polygon(data=M, ggplot2::aes(x=x0,y=y0), color='red',fill=NA,size=1.5) + |
| 91 | + ggplot2::geom_polygon(data=M, ggplot2::aes(x=x1,y=y1), color='green',fill=NA,size=1.5) + |
| 92 | + ggplot2::geom_polygon(data=M, ggplot2::aes(x=x2,y=y2), color='blue',fill=NA,size=1.5) + |
| 93 | + ggplot2::xlab(trait_name1)+ggplot2::ylab(trait_name2) + ggplot2::ggtitle(title) |
| 94 | +} |
0 commit comments