From ac72634b481eb7be9c5ab9ce5a85ee45cddf3b7e Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Thu, 25 Aug 2022 10:19:25 +0100 Subject: [PATCH] added dir for embb for consistency and checks and moved others to version1 --- .../embb/basic_barplots_embb.R | 364 ++++++++++++++++++ .../embb/basic_barplots_embb_layout.R | 310 +++++++++++++++ .../embb/dm_om_plots_layout_embb.R | 176 +++++++++ .../plotting_thesis/embb/embb_other_plots.R | 2 + .../plotting_thesis/embb/gg_pairs_all_embb.R | 203 ++++++++++ .../embb/pe_sens_site_count_embb.R | 173 +++++++++ .../embb/prominent_effects_embb.R | 320 +++++++++++++++ .../embb/sensitivity_count_embb.R | 65 ++++ .../gid/pe_sens_site_count_gid.R | 2 +- .../plotting_thesis/linage_bp_dist_layout.R | 1 - .../{ => version1}/basic_barplots.R | 0 .../{ => version1}/basic_barplots_UPDATED.R | 0 .../{ => version1}/basic_barplots_layout_v2.R | 0 .../{ => version1}/corr_plots_thesis.R | 0 .../{ => version1}/dm_om_plots_layout.R | 0 .../plotting_thesis/{ => version1}/gg_pairs.R | 0 .../{ => version1}/gg_pairs_all.R | 0 .../{ => version1}/pe_sens_site_count.R | 0 .../{ => version1}/preformatting.R | 0 19 files changed, 1614 insertions(+), 2 deletions(-) create mode 100644 scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R create mode 100644 scripts/plotting/plotting_thesis/embb/basic_barplots_embb_layout.R create mode 100644 scripts/plotting/plotting_thesis/embb/dm_om_plots_layout_embb.R create mode 100644 scripts/plotting/plotting_thesis/embb/embb_other_plots.R create mode 100644 scripts/plotting/plotting_thesis/embb/gg_pairs_all_embb.R create mode 100644 scripts/plotting/plotting_thesis/embb/pe_sens_site_count_embb.R create mode 100644 scripts/plotting/plotting_thesis/embb/prominent_effects_embb.R create mode 100644 scripts/plotting/plotting_thesis/embb/sensitivity_count_embb.R rename scripts/plotting/plotting_thesis/{ => version1}/basic_barplots.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/basic_barplots_UPDATED.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/basic_barplots_layout_v2.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/corr_plots_thesis.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/dm_om_plots_layout.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/gg_pairs.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/gg_pairs_all.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/pe_sens_site_count.R (100%) rename scripts/plotting/plotting_thesis/{ => version1}/preformatting.R (100%) diff --git a/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R b/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R new file mode 100644 index 0000000..4ae9f50 --- /dev/null +++ b/scripts/plotting/plotting_thesis/embb/basic_barplots_embb.R @@ -0,0 +1,364 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots +# basic barplots with outcome +# basic barplots with frequency of count of mutations +######################################################### +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#################################################### +class(merged_df3) + +df3 = subset(merged_df3, select = -c(pos_count)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################## +# blue, red bp +sts = 8 +lts = 8 +ats = 8 +als = 8 +ltis = 8 +geom_ls = 2.2 + +#pos_count +subtitle_size = 8 +geom_ls_pc = 2.2 +leg_text_size = 8 +axis_text_size = 8 +axis_label_size = 8 + +########################################################### +#------------------------------ +# plot default sizes +#------------------------------ +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname +naDist_colname + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]DistCutOff,"mCSM-lig"]=0 +corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0 + +if (tolower(gene)%in%geneL_ppi2){ + corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0 +} + +if (tolower(gene)%in%geneL_na){ + corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0 +} + +# count 0 +#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100 +unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) +unmasked_vals + +########################################################## +#================ +# Stability +#================ +corr_ps_colnames = c(static_cols + , "DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , aff_dist_cols + , "dst_mode") + +corr_df_ps = corr_plotdf[, corr_ps_colnames] + +# Plot #1 +plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") + +########################################################## +#================ +# Conservation +#================ +corr_conservation_cols = c( static_cols + , "ConSurf" + , "SNAP2" + , "PROVEAN" + #, aff_dist_cols + , "dst_mode" +) + +corr_df_cons = corr_plotdf[, corr_conservation_cols] + +# Plot #2 +plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates") + +########################################################## +#================ +# Affinity: lig, ppi and na as applicable +#================ +#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) +head(str_df) + +scaled_cols_tc = other_cols[grep("scaled", other_cols)] + + +################################################ +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df[[pos_colname]]==x,y] +} + +for (i in unique(str_df[[pos_colname]]) ){ + print(i) + #cat(length(unique(str_df[[pos_colname]]))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest + str_df[str_df[[pos_colname]]==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) + + str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +colnames(str_df) +table(str_df$effect_type) + +# check +str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] + +#================ +# for Plots +#================ +str_df_short = str_df[, c("mutationinformation", + #"position", + pos_colname, + "sensitivity" + , "effect_type" + , "effect_sign")] + +table(str_df_short$effect_type) +table(str_df_short$effect_sign) +str(str_df_short) + +# assign pe outcome +str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") +table(str_df_short$pe_outcome ) +table(str_df_short$effect_sign) + +#============== +# group effect type: +# lig, ppi2, nuc. acid, stability +#============== +affcols = c("affinity_scaled", "mmcsm_lig_scaled") +ppi2_cols = c("mcsm_ppi2_scaled") + +#lig +table(str_df_short$effect_type) +str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols + , "lig" + , str_df_short$effect_type) +table(str_df_short$effect_grouped) + +#ppi2 +str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols + , "ppi2" + , str_df_short$effect_grouped) +table(str_df_short$effect_grouped) + + +#stability +str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", + "ppi2" + ) + , "stability" + , str_df_short$effect_grouped) + +table(str_df_short$effect_grouped) + +# create a sign as well +str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" + , str_df_short$effect_grouped) + +table(str_df_short$pe_effect_outcome) + +##################################################################### +# Chimera: for colouring +#################################################################### + +#------------------------------------- +# get df with unique position +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] + +if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c(pos_colname, + "sensitivity", + "pe_outcome", + "effect_grouped", + "pe_effect_outcome")] +head(str_df_plot_cols) + +# colour intensity based on sign +#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") + +table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) +head(str_df_plot_cols) + +# colour based on effect +table(str_df_plot_cols$pe_effect_outcome) + +pe_colour_map = c("DD_lig" = "#f0e68c" # khaki + , "SS_lig" = "#ffd700" # gold + + , "DD_nucleic_acid"= "#d2b48c" # sandybrown + , "SS_nucleic_acid"= "#a0522d" # sienna + + , "DD_ppi2" = "#da70d6" # orchid + , "SS_ppi2" = "#ff1493" # deeppink + + , "DD_stability" = "#f8766d" # red + , "SS_stability" = "#00BFC4") # blue + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome + ,function(x){pe_colour_map[[x]]} + )) +head(str_df_plot_cols$colour_map) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue +# , "_" +# , str_df_plot_cols$colour_map) +# head(str_df_plot_cols$colours) +# table(str_df_plot_cols$colours) +# +# +# class(str_df_plot_cols$colour_map) +# str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +chain_suffix = ".A" +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +# -ve Lig Aff +dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] +if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ + dd_lig_pos = dd_lig[[pos_colname]] +}else{ + stop("Abort: DD affinity colour numbers mismtatch") +} +toString(paste0(dd_lig_pos, chain_suffix)) + + +# +ve Lig Aff +ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] +if (!empty(ss_lig)){ + if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ + ss_lig_pos = ss_lig[[pos_colname]] + }else{ + stop("Abort: SS affinity colour numbers mismtatch") + } + #put in chimera cmd + toString(paste0(ss_lig_pos, chain_suffix)) +} + + +#=================================================== +#------------------- +# PPI2 Affinity +#------------------- +# -ve PPI2 +dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",] +if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){ + dd_ppi2_pos = dd_ppi2[[pos_colname]] +}else{ + stop("Abort: DD PPI2 colour numbers mismtatch") +} + +# +ve PPI2 +ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",] +if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){ + ss_ppi2_pos = ss_ppi2[[pos_colname]] +}else{ + stop("Abort: SS PPI2 colour numbers mismtatch") +} + +#put in chimera cmd +toString(paste0(dd_ppi2_pos,chain_suffix)) +toString(paste0(ss_ppi2_pos,chain_suffix)) + +#========================================================= +#------------------------ +# Stability +#------------------------ +# -ve Stability +dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] +if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ + dd_stability_pos = dd_stability[[pos_colname]] +}else{ + stop("Abort: DD Stability colour numbers mismtatch") +} + +# +ve Stability +ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] +if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ + ss_stability_pos = ss_stability[[pos_colname]] +}else{ + stop("Abort: SS Stability colour numbers mismtatch") +} + +#put in chimera cmd +toString(paste0(dd_stability_pos, chain_suffix)) +toString(paste0(ss_stability_pos, chain_suffix)) +#################################################################### + diff --git a/scripts/plotting/plotting_thesis/embb/sensitivity_count_embb.R b/scripts/plotting/plotting_thesis/embb/sensitivity_count_embb.R new file mode 100644 index 0000000..66caef2 --- /dev/null +++ b/scripts/plotting/plotting_thesis/embb/sensitivity_count_embb.R @@ -0,0 +1,65 @@ +#========================= +# Count Sensitivity +# Mutations and positions +#========================= +pos_colname_c ="position" + +sensP_df = merged_df3[,c("mutationinformation", + #"position", + pos_colname_c, + "sensitivity")] + +head(sensP_df) +table(sensP_df$sensitivity) + +#--------------- +# Total unique positions +#---------------- +tot_mut_pos = length(unique(sensP_df[[pos_colname_c]])) +cat("\nNo of Tot muts sites:", tot_mut_pos) + +# resistant mut pos +sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"] +sens_site_UR = unique(sens_site_allR) +length(sens_site_UR) + +# Sensitive mut pos +sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"] +sens_site_US = unique(sens_site_allS) +length(sens_site_UR) + +#--------------- +# Common Sites +#---------------- +common_pos = intersect(sens_site_UR,sens_site_US) +site_Cc = length(common_pos) +cat("\nNo of Common sites:", site_Cc + , "\nThese are:", common_pos) + +#--------------- +# Resistant muts +#---------------- +site_R = sens_site_UR[!sens_site_UR%in%common_pos] +site_Rc = length(site_R) + +if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){ + cat("\nNo of R muts:", length(sens_site_allR) + , "\nNo. of R sites:",site_Rc + , "\nThese are:", site_R +) +} + +#--------------- +# Sensitive muts +#---------------- +site_S = sens_site_US[!sens_site_US%in%common_pos] +site_Sc = length(site_S) + +if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){ + cat("\nNo of S muts:", length(sens_site_allS) + , "\nNo. of S sites:", site_Sc + , "\nThese are:", site_S) +} + +######################### + diff --git a/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R b/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R index 35d3cb7..28e97f7 100644 --- a/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R +++ b/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R @@ -3,7 +3,7 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/sensi ############################################################## # PE count -#pe_colour_map = c("DD_lig" = "#f0e68c" # khaki +#pe_colour_map = c("DD_lig" = "#f0e68c" # khaki # , "SS_lig" = "#ffd700" # gold # , "DD_nucleic_acid"= "#d2b48c" # sandybrown diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R index 071e0ce..f19822e 100644 --- a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R +++ b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R @@ -31,7 +31,6 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp # png my_label_size = 12 - linPlots_combined = paste0(outdir_images , tolower(gene) ,"_linP_combined.png") diff --git a/scripts/plotting/plotting_thesis/basic_barplots.R b/scripts/plotting/plotting_thesis/version1/basic_barplots.R similarity index 100% rename from scripts/plotting/plotting_thesis/basic_barplots.R rename to scripts/plotting/plotting_thesis/version1/basic_barplots.R diff --git a/scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R b/scripts/plotting/plotting_thesis/version1/basic_barplots_UPDATED.R similarity index 100% rename from scripts/plotting/plotting_thesis/basic_barplots_UPDATED.R rename to scripts/plotting/plotting_thesis/version1/basic_barplots_UPDATED.R diff --git a/scripts/plotting/plotting_thesis/basic_barplots_layout_v2.R b/scripts/plotting/plotting_thesis/version1/basic_barplots_layout_v2.R similarity index 100% rename from scripts/plotting/plotting_thesis/basic_barplots_layout_v2.R rename to scripts/plotting/plotting_thesis/version1/basic_barplots_layout_v2.R diff --git a/scripts/plotting/plotting_thesis/corr_plots_thesis.R b/scripts/plotting/plotting_thesis/version1/corr_plots_thesis.R similarity index 100% rename from scripts/plotting/plotting_thesis/corr_plots_thesis.R rename to scripts/plotting/plotting_thesis/version1/corr_plots_thesis.R diff --git a/scripts/plotting/plotting_thesis/dm_om_plots_layout.R b/scripts/plotting/plotting_thesis/version1/dm_om_plots_layout.R similarity index 100% rename from scripts/plotting/plotting_thesis/dm_om_plots_layout.R rename to scripts/plotting/plotting_thesis/version1/dm_om_plots_layout.R diff --git a/scripts/plotting/plotting_thesis/gg_pairs.R b/scripts/plotting/plotting_thesis/version1/gg_pairs.R similarity index 100% rename from scripts/plotting/plotting_thesis/gg_pairs.R rename to scripts/plotting/plotting_thesis/version1/gg_pairs.R diff --git a/scripts/plotting/plotting_thesis/gg_pairs_all.R b/scripts/plotting/plotting_thesis/version1/gg_pairs_all.R similarity index 100% rename from scripts/plotting/plotting_thesis/gg_pairs_all.R rename to scripts/plotting/plotting_thesis/version1/gg_pairs_all.R diff --git a/scripts/plotting/plotting_thesis/pe_sens_site_count.R b/scripts/plotting/plotting_thesis/version1/pe_sens_site_count.R similarity index 100% rename from scripts/plotting/plotting_thesis/pe_sens_site_count.R rename to scripts/plotting/plotting_thesis/version1/pe_sens_site_count.R diff --git a/scripts/plotting/plotting_thesis/preformatting.R b/scripts/plotting/plotting_thesis/version1/preformatting.R similarity index 100% rename from scripts/plotting/plotting_thesis/preformatting.R rename to scripts/plotting/plotting_thesis/version1/preformatting.R