added plotting/corr_plots_style2.Radded my version of pairs.panel with lower panel turned off. Also added new script for corr plots using my version of pairs.panel

This commit is contained in:
Tanushree Tunstall 2020-09-17 15:31:37 +01:00
parent e8b58bfe28
commit a6182f2b3d
2 changed files with 562 additions and 0 deletions

View file

@ -0,0 +1,304 @@
#!/usr/bin/env Rscript
#########################################################
# TASK: Corr plots for PS and Lig
# Output: 1 svg
#=======================================================================
# working dir and loading libraries
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting/")
getwd()
source("Header_TT.R")
require(cowplot)
source("combining_dfs_plotting.R")
source("my_pairs_panel.R") # with lower panel turned off
# should return the following dfs, directories and variables
# PS combined:
# 1) merged_df2
# 2) merged_df2_comp
# 3) merged_df3
# 4) merged_df3_comp
# LIG combined:
# 5) merged_df2_lig
# 6) merged_df2_comp_lig
# 7) merged_df3_lig
# 8) merged_df3_comp_lig
# 9) my_df_u
# 10) my_df_u_lig
cat(paste0("Directories imported:"
, "\ndatadir:", datadir
, "\nindir:", indir
, "\noutdir:", outdir
, "\nplotdir:", plotdir))
cat(paste0("Variables imported:"
, "\ndrug:", drug
, "\ngene:", gene
, "\ngene_match:", gene_match
, "\nAngstrom symbol:", angstroms_symbol
, "\nNo. of duplicated muts:", dup_muts_nu
, "\nNA count for ORs:", na_count
, "\nNA count in df2:", na_count_df2
, "\nNA count in df3:", na_count_df3))
#=======
# output
#=======
# can't combine by cowplot because not ggplots
#corr_plot_combined = "corr_combined.svg"
#plot_corr_plot_combined = paste0(plotdir,"/", corr_plot_combined)
# PS
corr_ps_s2 = "corr_PS_style2.svg"
plot_corr_ps_s2 = paste0(plotdir,"/", corr_ps_s2)
# LIG
corr_lig_s2 = "corr_LIG_style2.svg"
plot_corr_lig_s2 = paste0(plotdir,"/", corr_lig_s2)
####################################################################
# end of loading libraries and functions #
########################################################################
#%%%%%%%%%%%%%%%%%%%%%%%%%
df_ps = merged_df3_comp
df_lig = merged_df3_comp_lig
#%%%%%%%%%%%%%%%%%%%%%%%%%
rm( merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig, my_df_u, my_df_u_lig)
########################################################################
# end of data extraction and cleaning for plots #
########################################################################
#============================
# adding foldx scaled values
# scale data b/w -1 and 1
#============================
n = which(colnames(df_ps) == "ddg"); n
my_min = min(df_ps[,n]); my_min
my_max = max(df_ps[,n]); my_max
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
, df_ps[,n]/abs(my_min)
, df_ps[,n]/my_max)
# sanity check
my_min = min(df_ps$foldx_scaled); my_min
my_max = max(df_ps$foldx_scaled); my_max
if (my_min == -1 && my_max == 1){
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
, "\nProceeding with assigning foldx outcome category")
}else{
cat("FAIL: could not scale foldx ddg values"
, "Aborting!")
}
#================================
# adding foldx outcome category
# ddg<0 = "Stabilising" (-ve)
#=================================
c1 = table(df_ps$ddg < 0)
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
c2 = table(df_ps$ddg < 0)
if ( all(c1 == c2) ){
cat("PASS: foldx outcome successfully created")
}else{
cat("FAIL: foldx outcome could not be created. Aborting!")
exit()
}
table(df_ps$foldx_outcome)
#======================
# adding log cols
#======================
df_ps$log10_or_mychisq = log10(df_ps$or_mychisq)
df_ps$neglog_pval_fisher = -log10(df_ps$pval_fisher)
df_ps$log10_or_kin = log10(df_ps$or_kin)
df_ps$neglog_pwald_kin = -log10(df_ps$pwald_kin)
#===========================
# Data for Correlation plots:PS
#===========================
# subset data to generate pairwise correlations
cols_to_select = c("duet_scaled"
, "foldx_scaled"
, "log10_or_mychisq"
, "neglog_pval_fisher"
, "af"
, "duet_outcome"
, drug)
corr_data_ps = df_ps[cols_to_select]
dim(corr_data_ps)
#p_italic = substitute(paste("-Log(", italic('P'), ")"));p_italic
#p_adjusted_italic = substitute(paste("-Log(", italic('P adjusted'), ")"));p_adjusted_italic
# assign nice colnames (for display)
my_corr_colnames = c("DUET"
, "Foldx"
, "Log(OR)"
, "-Log(P)"
, "AF"
, "duet_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_ps)
colnames(corr_data_ps) <- my_corr_colnames
colnames(corr_data_ps)
#-----------------
# generate corr PS plot
#-----------------
start = 1
end = which(colnames(corr_data_ps) == drug); end # should be the last column
offset = 1
my_corr_ps = corr_data_ps[start:(end-offset)]
head(my_corr_ps)
#my_cols = c("#f8766d", "#00bfc4")
# deep blue :#007d85
# deep red: #ae301e
cat("Corr plot PS:", plot_corr_ps_s2)
svg(plot_corr_ps_s2, width = 15, height = 15)
#OutPlot1 = pairs.panels([1:(length(my_corr_ps)-1)]
OutPlot1 = my_pp(my_corr_ps[1:(length(my_corr_ps)-1)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = TRUE # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_ps$duet_outcome))]
, pch = 21
, jitter = T
#, alpha = .05
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 3
, cex.axis = 1.5
, cex.labels = 2.1
, cex.cor = 1
, smooth = F
)
print(OutPlot1)
dev.off()
#===========================
# Data for Correlation plots: LIG
#===========================
table(df_lig$ligand_outcome)
df_lig$log10_or_mychisq = log10(df_lig$or_mychisq)
df_lig$neglog_pval_fisher = -log10(df_lig$pval_fisher)
df_lig$log10_or_kin = log10(df_lig$or_kin)
df_lig$neglog_pwald_kin = -log10(df_lig$pwald_kin)
# subset data to generate pairwise correlations
cols_to_select = c("affinity_scaled"
, "log10_or_mychisq"
, "neglog_pval_fisher"
, "af"
, "ligand_outcome"
, drug)
corr_data_lig = df_lig[, cols_to_select]
dim(corr_data_lig)
# assign nice colnames (for display)
my_corr_colnames = c("Ligand Affinity"
, "Log(OR)"
, "-Log(P)"
, "AF"
, "ligand_outcome"
, drug)
length(my_corr_colnames)
colnames(corr_data_lig)
colnames(corr_data_lig) <- my_corr_colnames
colnames(corr_data_lig)
#-----------------
# generate corr LIG plot
#-----------------
start = 1
end = which(colnames(corr_data_lig) == drug); end # should be the last column
offset = 1
my_corr_lig = corr_data_lig[start:(end-offset)]
head(my_corr_lig)
cat("Corr LIG plot:", plot_corr_lig_s2)
svg(plot_corr_lig_s2, width = 15, height = 15)
#OutPlot2 = pairs.panels(my_corr_lig[1:(length(my_corr_lig)-1)]
OutPlot2 = my_pp(my_corr_lig[1:(length(my_corr_lig)-1)]
, method = "spearman" # correlation method
, hist.col = "grey" ##00AFBB
, density = TRUE # show density plots
, ellipses = F # show correlation ellipses
, stars = T
, rug = F
, breaks = "Sturges"
, show.points = T
, bg = c("#f8766d", "#00bfc4")[unclass(factor(my_corr_lig$ligand_outcome))]
, pch = 21
, jitter = T
#, alpha = .05
#, points(pch = 19, col = c("#f8766d", "#00bfc4"))
, cex = 3
, cex.axis = 1.5
, cex.labels = 2.1
, cex.cor = 1
, smooth = F
)
print(OutPlot2)
dev.off()
#######################################################
library(lattice)

View file

@ -0,0 +1,258 @@
my_pp = function (x, smooth = TRUE, scale = FALSE, density = TRUE, ellipses = TRUE,
digits = 2, method = "pearson", pch = 20, lm = FALSE, cor = TRUE,
jiggle = FALSE, factor = 2, hist.col = "cyan", show.points = TRUE,
rug = TRUE, breaks = "Sturges", cex.cor = 1, wt = NULL, smoother = FALSE,
stars = FALSE, ci = FALSE, alpha = 0.05, ...)
{
"panel.hist.density" <- function(x, ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1], usr[2], 0, 1.5))
tax <- table(x)
if (length(tax) < 11) {
breaks <- as.numeric(names(tax))
y <- tax/max(tax)
interbreak <- min(diff(breaks)) * (length(tax) -
1)/41
rect(breaks - interbreak, 0, breaks + interbreak,
y, col = hist.col)
}
else {
h <- hist(x, breaks = breaks, plot = FALSE)
breaks <- h$breaks
nB <- length(breaks)
y <- h$counts
y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col = hist.col)
}
if (density) {
tryd <- try(d <- density(x, na.rm = TRUE, bw = "nrd",
adjust = 1.2), silent = TRUE)
if (!inherits(tryd, "try-error")) {
d$y <- d$y/max(d$y)
lines(d)
}
}
if (rug)
rug(x)
}
"panel.cor" <- function(x, y, prefix = "", ...) {
usr <- par("usr")
on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
if (is.null(wt)) {
r <- cor(x, y, use = "pairwise", method = method)
}
else {
r <- cor.wt(data.frame(x, y), w = wt[, c(1:2)])$r[1,
2]
}
txt <- format(c(round(r, digits), 0.123456789), digits = digits)[1]
txt <- paste(prefix, txt, sep = "")
if (stars) {
pval <- r.test(sum(!is.na(x * y)), r)$p
symp <- symnum(pval, corr = FALSE, cutpoints = c(0,
0.001, 0.01, 0.05, 1), symbols = c("***", "**",
"*", " "), legend = FALSE)
txt <- paste0(txt, symp)
}
cex <- cex.cor * 0.8/(max(strwidth("0.12***"), strwidth(txt)))
if (scale) {
cex1 <- cex * abs(r)
if (cex1 < 0.25)
cex1 <- 0.25
text(0.5, 0.5, txt, cex = cex1)
}
else {
text(0.5, 0.5, txt, cex = cex)
}
}
"panel.smoother" <- function(x, y, pch = par("pch"), col.smooth = "red",
span = 2/3, iter = 3, ...) {
xm <- mean(x, na.rm = TRUE)
ym <- mean(y, na.rm = TRUE)
xs <- sd(x, na.rm = TRUE)
ys <- sd(y, na.rm = TRUE)
r = cor(x, y, use = "pairwise", method = method)
if (jiggle) {
x <- jitter(x, factor = factor)
y <- jitter(y, factor = factor)
}
if (smoother) {
smoothScatter(x, y, add = TRUE, nrpoints = 0)
}
else {
if (show.points)
points(x, y, pch = pch, ...)
}
ok <- is.finite(x) & is.finite(y)
if (any(ok)) {
if (smooth & ci) {
lml <- loess(y ~ x, degree = 1, family = "symmetric")
tempx <- data.frame(x = seq(min(x, na.rm = TRUE),
max(x, na.rm = TRUE), length.out = 47))
pred <- predict(lml, newdata = tempx, se = TRUE)
if (ci) {
upperci <- pred$fit + confid * pred$se.fit
lowerci <- pred$fit - confid * pred$se.fit
polygon(c(tempx$x, rev(tempx$x)), c(lowerci,
rev(upperci)), col = adjustcolor("light grey",
alpha.f = 0.8), border = NA)
}
lines(tempx$x, pred$fit, col = col.smooth, ...)
}
else {
if (smooth)
lines(stats::lowess(x[ok], y[ok], f = span,
iter = iter), col = col.smooth)
}
}
if (ellipses)
draw.ellipse(xm, ym, xs, ys, r, col.smooth = col.smooth,
...)
}
"panel.lm" <- function(x, y, pch = par("pch"), col.lm = "red",
...) {
ymin <- min(y)
ymax <- max(y)
xmin <- min(x)
xmax <- max(x)
ylim <- c(min(ymin, xmin), max(ymax, xmax))
xlim <- ylim
if (jiggle) {
x <- jitter(x, factor = factor)
y <- jitter(y, factor = factor)
}
if (smoother) {
smoothScatter(x, y, add = TRUE, nrpoints = 0)
}
else {
if (show.points) {
points(x, y, pch = pch, ylim = ylim, xlim = xlim,
...)
}
}
ok <- is.finite(x) & is.finite(y)
if (any(ok)) {
lml <- lm(y ~ x)
if (ci) {
tempx <- data.frame(x = seq(min(x, na.rm = TRUE),
max(x, na.rm = TRUE), length.out = 47))
pred <- predict.lm(lml, newdata = tempx, se.fit = TRUE)
upperci <- pred$fit + confid * pred$se.fit
lowerci <- pred$fit - confid * pred$se.fit
polygon(c(tempx$x, rev(tempx$x)), c(lowerci,
rev(upperci)), col = adjustcolor("light grey",
alpha.f = 0.8), border = NA)
}
if (ellipses) {
xm <- mean(x, na.rm = TRUE)
ym <- mean(y, na.rm = TRUE)
xs <- sd(x, na.rm = TRUE)
ys <- sd(y, na.rm = TRUE)
r = cor(x, y, use = "pairwise", method = method)
draw.ellipse(xm, ym, xs, ys, r, col.smooth = col.lm,
...)
}
abline(lml, col = col.lm, ...)
}
}
"draw.ellipse" <- function(x = 0, y = 0, xs = 1, ys = 1,
r = 0, col.smooth, add = TRUE, segments = 51, ...) {
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
if (!is.na(r)) {
if (abs(r) > 0)
theta <- sign(r)/sqrt(2)
else theta = 1/sqrt(2)
shape <- diag(c(sqrt(1 + r), sqrt(1 - r))) %*% matrix(c(theta,
theta, -theta, theta), ncol = 2, byrow = TRUE)
ellipse <- unit.circle %*% shape
ellipse[, 1] <- ellipse[, 1] * xs + x
ellipse[, 2] <- ellipse[, 2] * ys + y
if (show.points)
points(x, y, pch = 19, col = col.smooth, cex = 1.5)
lines(ellipse, ...)
}
}
"panel.ellipse" <- function(x, y, pch = par("pch"), col.smooth = "red",
...) {
segments = 51
usr <- par("usr")
on.exit(par(usr))
par(usr = c(usr[1] - abs(0.05 * usr[1]), usr[2] + abs(0.05 *
usr[2]), 0, 1.5))
xm <- mean(x, na.rm = TRUE)
ym <- mean(y, na.rm = TRUE)
xs <- sd(x, na.rm = TRUE)
ys <- sd(y, na.rm = TRUE)
r = cor(x, y, use = "pairwise", method = method)
if (jiggle) {
x <- jitter(x, factor = factor)
y <- jitter(y, factor = factor)
}
if (smoother) {
smoothScatter(x, y, add = TRUE, nrpoints = 0)
}
else {
if (show.points) {
points(x, y, pch = pch, ...)
}
}
angles <- (0:segments) * 2 * pi/segments
unit.circle <- cbind(cos(angles), sin(angles))
if (!is.na(r)) {
if (abs(r) > 0)
theta <- sign(r)/sqrt(2)
else theta = 1/sqrt(2)
shape <- diag(c(sqrt(1 + r), sqrt(1 - r))) %*% matrix(c(theta,
theta, -theta, theta), ncol = 2, byrow = TRUE)
ellipse <- unit.circle %*% shape
ellipse[, 1] <- ellipse[, 1] * xs + xm
ellipse[, 2] <- ellipse[, 2] * ys + ym
points(xm, ym, pch = 19, col = col.smooth, cex = 1.5)
if (ellipses)
lines(ellipse, ...)
}
}
old.par <- par(no.readonly = TRUE)
on.exit(par(old.par))
if (missing(cex.cor))
cex.cor <- 1
for (i in 1:ncol(x)) {
if (is.character(x[[i]])) {
x[[i]] <- as.numeric(as.factor(x[[i]]))
colnames(x)[i] <- paste(colnames(x)[i], "*", sep = "")
}
}
n.obs <- nrow(x)
confid <- qt(1 - alpha/2, n.obs - 2)
if (!lm) {
if (cor) {
pairs(x, diag.panel = panel.hist.density, upper.panel = panel.cor,
#lower.panel = panel.smoother
, lower.panel = NULL
, pch = pch, ...)
}
else {
pairs(x, diag.panel = panel.hist.density, upper.panel = panel.smoother,
#lower.panel = panel.smoother
, lower.panel = NULL
, pch = pch, ...)
}
}
else {
if (!cor) {
pairs(x, diag.panel = panel.hist.density, upper.panel = panel.lm,
#lower.panel = panel.lm
, lower.panel = NULL
, pch = pch, ...)
}
else {
pairs(x, diag.panel = panel.hist.density, upper.panel = panel.cor,
#lower.panel = panel.lm
, lower.panel = NULL
, pch = pch, ...)
}
}
}