From a6182f2b3d9bc6f35ab52376ec0f16ff78e3099b Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 17 Sep 2020 15:31:37 +0100 Subject: [PATCH] added plotting/corr_plots_style2.Radded my version of pairs.panel with lower panel turned off. Also added new script for corr plots using my version of pairs.panel --- scripts/plotting/corr_plots_style2.R | 304 +++++++++++++++++++++++++++ scripts/plotting/my_pairs_panel.R | 258 +++++++++++++++++++++++ 2 files changed, 562 insertions(+) create mode 100644 scripts/plotting/corr_plots_style2.R create mode 100644 scripts/plotting/my_pairs_panel.R diff --git a/scripts/plotting/corr_plots_style2.R b/scripts/plotting/corr_plots_style2.R new file mode 100644 index 0000000..74047ab --- /dev/null +++ b/scripts/plotting/corr_plots_style2.R @@ -0,0 +1,304 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Corr plots for PS and Lig + +# Output: 1 svg + +#======================================================================= +# working dir and loading libraries +getwd() +setwd("~/git/LSHTM_analysis/scripts/plotting/") +getwd() + + +source("Header_TT.R") +require(cowplot) +source("combining_dfs_plotting.R") +source("my_pairs_panel.R") # with lower panel turned off +# should return the following dfs, directories and variables + +# PS combined: +# 1) merged_df2 +# 2) merged_df2_comp +# 3) merged_df3 +# 4) merged_df3_comp + +# LIG combined: +# 5) merged_df2_lig +# 6) merged_df2_comp_lig +# 7) merged_df3_lig +# 8) merged_df3_comp_lig + +# 9) my_df_u +# 10) my_df_u_lig + +cat(paste0("Directories imported:" + , "\ndatadir:", datadir + , "\nindir:", indir + , "\noutdir:", outdir + , "\nplotdir:", plotdir)) + +cat(paste0("Variables imported:" + , "\ndrug:", drug + , "\ngene:", gene + , "\ngene_match:", gene_match + , "\nAngstrom symbol:", angstroms_symbol + , "\nNo. of duplicated muts:", dup_muts_nu + , "\nNA count for ORs:", na_count + , "\nNA count in df2:", na_count_df2 + , "\nNA count in df3:", na_count_df3)) + +#======= +# output +#======= +# can't combine by cowplot because not ggplots +#corr_plot_combined = "corr_combined.svg" +#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined) + +# PS +corr_ps_s2 = "corr_PS_style2.svg" +plot_corr_ps_s2 = paste0(plotdir,"/", corr_ps_s2) + +# LIG +corr_lig_s2 = "corr_LIG_style2.svg" +plot_corr_lig_s2 = paste0(plotdir,"/", corr_lig_s2) + +#################################################################### +# end of loading libraries and functions # +######################################################################## + +#%%%%%%%%%%%%%%%%%%%%%%%%% +df_ps = merged_df3_comp +df_lig = merged_df3_comp_lig +#%%%%%%%%%%%%%%%%%%%%%%%%% + +rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig) + +######################################################################## +# end of data extraction and cleaning for plots # +######################################################################## + +#============================ +# adding foldx scaled values +# scale data b/w -1 and 1 +#============================ +n = which(colnames(df_ps) == "ddg"); n + +my_min = min(df_ps[,n]); my_min +my_max = max(df_ps[,n]); my_max + +df_ps$foldx_scaled = ifelse(df_ps[,n] < 0 + , df_ps[,n]/abs(my_min) + , df_ps[,n]/my_max) +# sanity check +my_min = min(df_ps$foldx_scaled); my_min +my_max = max(df_ps$foldx_scaled); my_max + +if (my_min == -1 && my_max == 1){ + cat("PASS: foldx ddg successfully scaled b/w -1 and 1" + , "\nProceeding with assigning foldx outcome category") +}else{ + cat("FAIL: could not scale foldx ddg values" + , "Aborting!") +} + + +#================================ +# adding foldx outcome category +# ddg<0 = "Stabilising" (-ve) +#================================= + +c1 = table(df_ps$ddg < 0) +df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising") +c2 = table(df_ps$ddg < 0) + +if ( all(c1 == c2) ){ + cat("PASS: foldx outcome successfully created") +}else{ + cat("FAIL: foldx outcome could not be created. Aborting!") + exit() +} + +table(df_ps$foldx_outcome) + +#====================== +# adding log cols +#====================== + +df_ps$log10_or_mychisq = log10(df_ps$or_mychisq) +df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher) + +df_ps$log10_or_kin = log10(df_ps$or_kin) +df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin) + +#=========================== +# Data for Correlation plots:PS +#=========================== +# subset data to generate pairwise correlations +cols_to_select = c("duet_scaled" + + , "foldx_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "af" + + , "duet_outcome" + , drug) + +corr_data_ps = df_ps[cols_to_select] + +dim(corr_data_ps) + +#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic +#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic + +# assign nice colnames (for display) +my_corr_colnames = c("DUET" + + , "Foldx" + + , "Log(OR)" + , "-Log(P)" + + , "AF" + + , "duet_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_ps) +colnames(corr_data_ps) <- my_corr_colnames +colnames(corr_data_ps) + +#----------------- +# generate corr PS plot +#----------------- +start = 1 +end = which(colnames(corr_data_ps) == drug); end # should be the last column +offset = 1 + +my_corr_ps = corr_data_ps[start:(end-offset)] +head(my_corr_ps) + +#my_cols = c("#f8766d", "#00bfc4") +# deep blue :#007d85 +# deep red: #ae301e + +cat("Corr plot PS:", plot_corr_ps_s2) +svg(plot_corr_ps_s2, width = 15, height = 15) + +#OutPlot1 = pairs.panels([1:(length(my_corr_ps)-1)] +OutPlot1 = my_pp(my_corr_ps[1:(length(my_corr_ps)-1)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = TRUE # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))] + , pch = 21 + , jitter = T + #, alpha = .05 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 3 + , cex.axis = 1.5 + , cex.labels = 2.1 + , cex.cor = 1 + , smooth = F + +) + +print(OutPlot1) +dev.off() + +#=========================== +# Data for Correlation plots: LIG +#=========================== +table(df_lig$ligand_outcome) + +df_lig$log10_or_mychisq = log10(df_lig$or_mychisq) +df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher) + + +df_lig$log10_or_kin = log10(df_lig$or_kin) +df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin) + + +# subset data to generate pairwise correlations +cols_to_select = c("affinity_scaled" + + , "log10_or_mychisq" + , "neglog_pval_fisher" + + , "af" + + , "ligand_outcome" + , drug) + +corr_data_lig = df_lig[, cols_to_select] + + +dim(corr_data_lig) + +# assign nice colnames (for display) +my_corr_colnames = c("Ligand Affinity" + + , "Log(OR)" + , "-Log(P)" + + , "AF" + + , "ligand_outcome" + , drug) + +length(my_corr_colnames) + +colnames(corr_data_lig) +colnames(corr_data_lig) <- my_corr_colnames +colnames(corr_data_lig) + +#----------------- +# generate corr LIG plot +#----------------- + +start = 1 +end = which(colnames(corr_data_lig) == drug); end # should be the last column +offset = 1 + +my_corr_lig = corr_data_lig[start:(end-offset)] +head(my_corr_lig) + +cat("Corr LIG plot:", plot_corr_lig_s2) +svg(plot_corr_lig_s2, width = 15, height = 15) + +#OutPlot2 = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)] +OutPlot2 = my_pp(my_corr_lig[1:(length(my_corr_lig)-1)] + , method = "spearman" # correlation method + , hist.col = "grey" ##00AFBB + , density = TRUE # show density plots + , ellipses = F # show correlation ellipses + , stars = T + , rug = F + , breaks = "Sturges" + , show.points = T + , bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))] + , pch = 21 + , jitter = T + #, alpha = .05 + #, points(pch = 19, col = c("#f8766d", "#00bfc4")) + , cex = 3 + , cex.axis = 1.5 + , cex.labels = 2.1 + , cex.cor = 1 + , smooth = F +) + +print(OutPlot2) +dev.off() +####################################################### +library(lattice) diff --git a/scripts/plotting/my_pairs_panel.R b/scripts/plotting/my_pairs_panel.R new file mode 100644 index 0000000..8380f1f --- /dev/null +++ b/scripts/plotting/my_pairs_panel.R @@ -0,0 +1,258 @@ +my_pp = function (x, smooth = TRUE, scale = FALSE, density = TRUE, ellipses = TRUE, + digits = 2, method = "pearson", pch = 20, lm = FALSE, cor = TRUE, + jiggle = FALSE, factor = 2, hist.col = "cyan", show.points = TRUE, + rug = TRUE, breaks = "Sturges", cex.cor = 1, wt = NULL, smoother = FALSE, + stars = FALSE, ci = FALSE, alpha = 0.05, ...) +{ + "panel.hist.density" <- function(x, ...) { + usr <- par("usr") + on.exit(par(usr)) + par(usr = c(usr[1], usr[2], 0, 1.5)) + tax <- table(x) + if (length(tax) < 11) { + breaks <- as.numeric(names(tax)) + y <- tax/max(tax) + interbreak <- min(diff(breaks)) * (length(tax) - + 1)/41 + rect(breaks - interbreak, 0, breaks + interbreak, + y, col = hist.col) + } + else { + h <- hist(x, breaks = breaks, plot = FALSE) + breaks <- h$breaks + nB <- length(breaks) + y <- h$counts + y <- y/max(y) + rect(breaks[-nB], 0, breaks[-1], y, col = hist.col) + } + if (density) { + tryd <- try(d <- density(x, na.rm = TRUE, bw = "nrd", + adjust = 1.2), silent = TRUE) + if (!inherits(tryd, "try-error")) { + d$y <- d$y/max(d$y) + lines(d) + } + } + if (rug) + rug(x) + } + "panel.cor" <- function(x, y, prefix = "", ...) { + usr <- par("usr") + on.exit(par(usr)) + par(usr = c(0, 1, 0, 1)) + if (is.null(wt)) { + r <- cor(x, y, use = "pairwise", method = method) + } + else { + r <- cor.wt(data.frame(x, y), w = wt[, c(1:2)])$r[1, + 2] + } + txt <- format(c(round(r, digits), 0.123456789), digits = digits)[1] + txt <- paste(prefix, txt, sep = "") + if (stars) { + pval <- r.test(sum(!is.na(x * y)), r)$p + symp <- symnum(pval, corr = FALSE, cutpoints = c(0, + 0.001, 0.01, 0.05, 1), symbols = c("***", "**", + "*", " "), legend = FALSE) + txt <- paste0(txt, symp) + } + cex <- cex.cor * 0.8/(max(strwidth("0.12***"), strwidth(txt))) + if (scale) { + cex1 <- cex * abs(r) + if (cex1 < 0.25) + cex1 <- 0.25 + text(0.5, 0.5, txt, cex = cex1) + } + else { + text(0.5, 0.5, txt, cex = cex) + } + } + "panel.smoother" <- function(x, y, pch = par("pch"), col.smooth = "red", + span = 2/3, iter = 3, ...) { + xm <- mean(x, na.rm = TRUE) + ym <- mean(y, na.rm = TRUE) + xs <- sd(x, na.rm = TRUE) + ys <- sd(y, na.rm = TRUE) + r = cor(x, y, use = "pairwise", method = method) + if (jiggle) { + x <- jitter(x, factor = factor) + y <- jitter(y, factor = factor) + } + if (smoother) { + smoothScatter(x, y, add = TRUE, nrpoints = 0) + } + else { + if (show.points) + points(x, y, pch = pch, ...) + } + ok <- is.finite(x) & is.finite(y) + if (any(ok)) { + if (smooth & ci) { + lml <- loess(y ~ x, degree = 1, family = "symmetric") + tempx <- data.frame(x = seq(min(x, na.rm = TRUE), + max(x, na.rm = TRUE), length.out = 47)) + pred <- predict(lml, newdata = tempx, se = TRUE) + if (ci) { + upperci <- pred$fit + confid * pred$se.fit + lowerci <- pred$fit - confid * pred$se.fit + polygon(c(tempx$x, rev(tempx$x)), c(lowerci, + rev(upperci)), col = adjustcolor("light grey", + alpha.f = 0.8), border = NA) + } + lines(tempx$x, pred$fit, col = col.smooth, ...) + } + else { + if (smooth) + lines(stats::lowess(x[ok], y[ok], f = span, + iter = iter), col = col.smooth) + } + } + if (ellipses) + draw.ellipse(xm, ym, xs, ys, r, col.smooth = col.smooth, + ...) + } + "panel.lm" <- function(x, y, pch = par("pch"), col.lm = "red", + ...) { + ymin <- min(y) + ymax <- max(y) + xmin <- min(x) + xmax <- max(x) + ylim <- c(min(ymin, xmin), max(ymax, xmax)) + xlim <- ylim + if (jiggle) { + x <- jitter(x, factor = factor) + y <- jitter(y, factor = factor) + } + if (smoother) { + smoothScatter(x, y, add = TRUE, nrpoints = 0) + } + else { + if (show.points) { + points(x, y, pch = pch, ylim = ylim, xlim = xlim, + ...) + } + } + ok <- is.finite(x) & is.finite(y) + if (any(ok)) { + lml <- lm(y ~ x) + if (ci) { + tempx <- data.frame(x = seq(min(x, na.rm = TRUE), + max(x, na.rm = TRUE), length.out = 47)) + pred <- predict.lm(lml, newdata = tempx, se.fit = TRUE) + upperci <- pred$fit + confid * pred$se.fit + lowerci <- pred$fit - confid * pred$se.fit + polygon(c(tempx$x, rev(tempx$x)), c(lowerci, + rev(upperci)), col = adjustcolor("light grey", + alpha.f = 0.8), border = NA) + } + if (ellipses) { + xm <- mean(x, na.rm = TRUE) + ym <- mean(y, na.rm = TRUE) + xs <- sd(x, na.rm = TRUE) + ys <- sd(y, na.rm = TRUE) + r = cor(x, y, use = "pairwise", method = method) + draw.ellipse(xm, ym, xs, ys, r, col.smooth = col.lm, + ...) + } + abline(lml, col = col.lm, ...) + } + } + "draw.ellipse" <- function(x = 0, y = 0, xs = 1, ys = 1, + r = 0, col.smooth, add = TRUE, segments = 51, ...) { + angles <- (0:segments) * 2 * pi/segments + unit.circle <- cbind(cos(angles), sin(angles)) + if (!is.na(r)) { + if (abs(r) > 0) + theta <- sign(r)/sqrt(2) + else theta = 1/sqrt(2) + shape <- diag(c(sqrt(1 + r), sqrt(1 - r))) %*% matrix(c(theta, + theta, -theta, theta), ncol = 2, byrow = TRUE) + ellipse <- unit.circle %*% shape + ellipse[, 1] <- ellipse[, 1] * xs + x + ellipse[, 2] <- ellipse[, 2] * ys + y + if (show.points) + points(x, y, pch = 19, col = col.smooth, cex = 1.5) + lines(ellipse, ...) + } + } + "panel.ellipse" <- function(x, y, pch = par("pch"), col.smooth = "red", + ...) { + segments = 51 + usr <- par("usr") + on.exit(par(usr)) + par(usr = c(usr[1] - abs(0.05 * usr[1]), usr[2] + abs(0.05 * + usr[2]), 0, 1.5)) + xm <- mean(x, na.rm = TRUE) + ym <- mean(y, na.rm = TRUE) + xs <- sd(x, na.rm = TRUE) + ys <- sd(y, na.rm = TRUE) + r = cor(x, y, use = "pairwise", method = method) + if (jiggle) { + x <- jitter(x, factor = factor) + y <- jitter(y, factor = factor) + } + if (smoother) { + smoothScatter(x, y, add = TRUE, nrpoints = 0) + } + else { + if (show.points) { + points(x, y, pch = pch, ...) + } + } + angles <- (0:segments) * 2 * pi/segments + unit.circle <- cbind(cos(angles), sin(angles)) + if (!is.na(r)) { + if (abs(r) > 0) + theta <- sign(r)/sqrt(2) + else theta = 1/sqrt(2) + shape <- diag(c(sqrt(1 + r), sqrt(1 - r))) %*% matrix(c(theta, + theta, -theta, theta), ncol = 2, byrow = TRUE) + ellipse <- unit.circle %*% shape + ellipse[, 1] <- ellipse[, 1] * xs + xm + ellipse[, 2] <- ellipse[, 2] * ys + ym + points(xm, ym, pch = 19, col = col.smooth, cex = 1.5) + if (ellipses) + lines(ellipse, ...) + } + } + old.par <- par(no.readonly = TRUE) + on.exit(par(old.par)) + if (missing(cex.cor)) + cex.cor <- 1 + for (i in 1:ncol(x)) { + if (is.character(x[[i]])) { + x[[i]] <- as.numeric(as.factor(x[[i]])) + colnames(x)[i] <- paste(colnames(x)[i], "*", sep = "") + } + } + n.obs <- nrow(x) + confid <- qt(1 - alpha/2, n.obs - 2) + if (!lm) { + if (cor) { + pairs(x, diag.panel = panel.hist.density, upper.panel = panel.cor, + #lower.panel = panel.smoother + , lower.panel = NULL + , pch = pch, ...) + } + else { + pairs(x, diag.panel = panel.hist.density, upper.panel = panel.smoother, + #lower.panel = panel.smoother + , lower.panel = NULL + , pch = pch, ...) + } + } + else { + if (!cor) { + pairs(x, diag.panel = panel.hist.density, upper.panel = panel.lm, + #lower.panel = panel.lm + , lower.panel = NULL + , pch = pch, ...) + } + else { + pairs(x, diag.panel = panel.hist.density, upper.panel = panel.cor, + #lower.panel = panel.lm + , lower.panel = NULL + , pch = pch, ...) + } + } +} \ No newline at end of file