diff --git a/scripts/plotting/plotting_thesis/alr/DONOTUSEdm_om_plots_layout_alr.R b/scripts/plotting/plotting_thesis/alr/DONOTUSEdm_om_plots_layout_alr.R new file mode 100644 index 0000000..cab13ae --- /dev/null +++ b/scripts/plotting/plotting_thesis/alr/DONOTUSEdm_om_plots_layout_alr.R @@ -0,0 +1,177 @@ +# # source dm_om_plots.R +# source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/dm_om_plots.R") +# +# ##### plots to combine #### +# duetP +# foldxP +# deepddgP +# dynamut2P +# genomicsP +# consurfP +# proveanP +# snap2P +# mcsmligP +# mcsmlig2P +# mcsmppi2P +# +# +# # Plot labels +# tit1 = "Stability changes" +# tit2 = "Genomic measure" +# tit3 = "Distance to partners" +# tit4 = "Evolutionary Conservation" +# tit5 = "Affinity changes" +# pt_size = 30 +# +# theme_georgia <- function(...) { +# theme_gray(base_family = "sans", ...) + +# theme(plot.title = element_text(face = "bold")) +# } +# +# +# title_theme <- calc_element("plot.title", theme_georgia()) +# +# pt1 = ggdraw() + +# draw_label( +# tit1, +# fontfamily = title_theme$family, +# fontface = title_theme$face, +# #size = title_theme$size +# size = pt_size +# ) +# +# pt2 = ggdraw() + +# draw_label( +# tit2, +# fontfamily = title_theme$family, +# fontface = title_theme$face, +# size = pt_size +# ) +# +# pt3 = ggdraw() + +# draw_label( +# tit3, +# fontfamily = title_theme$family, +# fontface = title_theme$face, +# size = pt_size +# ) +# +# pt4 = ggdraw() + +# draw_label( +# tit4, +# fontfamily = title_theme$family, +# fontface = title_theme$face, +# size = pt_size +# ) +# +# +# pt5 = ggdraw() + +# draw_label( +# tit5, +# fontfamily = title_theme$family, +# fontface = title_theme$face, +# size = pt_size +# ) +# +# #====================== +# # Output plot function +# #====================== +# OutPlot_dm_om = function(x){ +# +# # dist b/w plot title and plot +# relH_tp = c(0.08, 0.92) +# +# my_label_size = 25 +# #---------------- +# # Top panel +# #---------------- +# top_panel = cowplot::plot_grid( +# cowplot::plot_grid(pt1, +# cowplot::plot_grid(duetP, foldxP, deepddgP, dynamut2P +# , nrow = 1 +# , labels = c("A", "B", "C", "D") +# , label_size = my_label_size) +# , ncol = 1 +# , rel_heights = relH_tp +# ), +# NULL, +# cowplot::plot_grid(pt2, +# cowplot::plot_grid(genomicsP +# , nrow = 1 +# , labels = c("E") +# , label_size = my_label_size) +# , ncol = 1 +# , rel_heights = relH_tp +# ), +# NULL, +# cowplot::plot_grid(pt3, +# cowplot::plot_grid( #distanceP +# distanceP_lig +# , distanceP_ppi2 +# , nrow = 1 +# , labels = c("F", "G") +# , label_size = my_label_size) +# , ncol = 1 +# , rel_heights = relH_tp +# ), +# nrow = 1, +# rel_widths = c(2/7, 0.1/7, 0.5/7, 0.1/7, 1/7) +# ) +# +# #---------------- +# # Bottom panel +# #---------------- +# bottom_panel = cowplot::plot_grid( +# cowplot::plot_grid(pt4, +# cowplot::plot_grid(consurfP, proveanP, snap2P +# , nrow = 1 +# , labels = c("H", "I", "J") +# , label_size = my_label_size) +# , ncol = 1 +# , rel_heights =relH_tp +# ),NULL, +# cowplot::plot_grid(pt5, +# cowplot::plot_grid(mcsmligP +# , mcsmlig2P +# , mcsmppi2P +# , nrow = 1 +# , labels = c("K", "L", "M") +# , label_size = my_label_size) +# , ncol = 1 +# , rel_heights = relH_tp +# ),NULL, +# nrow = 1, +# rel_widths = c(3/6,0.1/6,3/6, 0.1/6 ) +# ) +# +# #------------------------------- +# # combine: Top and Bottom panel +# #------------------------------- +# cowplot::plot_grid (top_panel, bottom_panel +# , nrow =2 +# , rel_widths = c(1, 1) +# , align = "hv") +# } +# +# #===================== +# # OutPlot: svg and png +# #====================== +# dm_om_combinedP = paste0(outdir_images +# ,tolower(gene) +# ,"_dm_om_all.svg") +# +# cat("DM OM plots with stats:", dm_om_combinedP) +# svg(dm_om_combinedP, width = 32, height = 18) +# +# OutPlot_dm_om() +# dev.off() +# +# +# dm_om_combinedP_png = paste0(outdir_images +# ,tolower(gene) +# ,"_dm_om_all.png") +# cat("DM OM plots with stats:", dm_om_combinedP_png) +# png(dm_om_combinedP_png, width = 32, height = 18, units = "in", res = 300) +# +# OutPlot_dm_om() +# dev.off() diff --git a/scripts/plotting/plotting_thesis/alr/alr_ORandSNP_results.R b/scripts/plotting/plotting_thesis/alr/alr_ORandSNP_results.R new file mode 100644 index 0000000..5567926 --- /dev/null +++ b/scripts/plotting/plotting_thesis/alr/alr_ORandSNP_results.R @@ -0,0 +1,198 @@ +#!/usr/bin/env Rscript +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +################################################################### +# FIXME: ADD distance to NA when SP replies + +geneL_normal = c("pnca") +geneL_na = c("gid", "rpob") +geneL_ppi2 = c("alr", "embb", "katg", "rpob") + +# LigDist_colname # from globals used +# ppi2Dist_colname #from globals used +# naDist_colname #from globals used + +common_cols = c("mutationinformation" + , "X5uhc_position" + , "X5uhc_offset" + , "position" + , "dst_mode" + , "mutation_info_labels" + , "sensitivity", dist_columns ) + + +######################################## +categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) ) +fact_cols = colnames(merged_df3)[categ_cols_to_factor] + +if (any(lapply(merged_df3[, fact_cols], class) == "character")){ + cat("\nChanging", length(categ_cols_to_factor), "cols to factor") + merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor) + if (all(lapply(merged_df3[, fact_cols], class) == "factor")){ + cat("\nSuccessful: cols changed to factor") + } +}else{ + cat("\nRequested cols aready factors") +} + +cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] ) + +#################################### +# merged_df3: NECESSARY pre-processing +################################### +#df3 = merged_df3 +plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode" + , all_cols) + +all_cols = c(common_cols + , all_stability_cols + , all_affinity_cols + , all_conserv_cols) + + +# counting +foo = merged_df3[, c("mutationinformation" + , "wild_pos" + , "position" + , "sensitivity" + , "avg_lig_affinity" + , "avg_lig_affinity_scaled" + , "avg_lig_affinity_outcome" + , "ligand_distance" + , "ligand_affinity_change" + , "affinity_scaled" + , "ligand_outcome" + , "consurf_colour_rev" + , "consurf_outcome")] + +table(foo$consurf_outcome) +foo2 = foo[foo$ligand_distance<10,] + +table(foo2$ligand_outcome) + +############################# +# wide plots SNP +# DRUG +length(aa_pos_drug); aa_pos_drug +drug = foo[foo$position%in%aa_pos_drug,] +drug$wild_pos +#CA + + +############################################### +# OR plot + +bar = merged_df3[, c("mutationinformation" + , "wild_pos" + , "position" + , "sensitivity" + , affinity_dist_colnames + , "or_mychisq" + , "pval_fisher" + #, "pval_chisq" + , "neglog_pval_fisher" + , "log10_or_mychisq")] + +# bar$p_adj_bonferroni = p.adjust(bar$pval_fisher, method = "bonferroni") +# bar$signif_bon = bar$p_adj_bonferroni +# bar = dplyr::mutate(bar +# , signif_bon = case_when(signif_bon == 0.05 ~ "." +# , signif_bon <=0.0001 ~ '****' +# , signif_bon <=0.001 ~ '***' +# , signif_bon <=0.01 ~ '**' +# , signif_bon <0.05 ~ '*' +# , TRUE ~ 'ns')) + +bar$p_adj_fdr = p.adjust(bar$pval_fisher, method = "BH") +bar$signif_fdr = bar$p_adj_fdr +bar = dplyr::mutate(bar + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) + +# sort df +bar = bar[order(bar$or_mychisq, decreasing = T), ] +bar = bar[, c("mutationinformation" + , "wild_pos" + , "position" + , "sensitivity" + , affinity_dist_colnames + , "or_mychisq" + #, "pval_fisher" + #, "pval_chisq" + #, "neglog_pval_fisher" + #, "log10_or_mychisq" + #, "signif_bon" + , "p_adj_fdr" + , "signif_fdr")] + +table(bar$sensitivity) + +str(bar) +sen = bar[bar$or_mychisq<1,] +sen = na.omit(sen) + +res = bar[bar$or_mychisq>1,] +res = na.omit(res) + +# comp +bar_or = bar[!is.na(bar$or_mychisq),] +table(bar_or$sensitivity) + +sen1 = bar_or[bar_or$or_mychisq<1,] # sen and res ~OR +res1 = bar_or[bar_or$or_mychisq>1,] # sen and res ~OR + +# sanity check +if (nrow(bar_or) == nrow(sen1) + nrow(res1) ){ + cat("\nPASS: df with or successfully sourced" + , "\nCalculating % of muts with OR>1") +}else{ + stop("Abort: df with or numbers mimatch") +} + +# percent for OR muts +pc_orR = nrow(res1)/(nrow(sen1) + nrow(res1)); pc_orR + +cat("Number of R muts with OR>1:", nrow(res1) + , "\nPercentage of muts with OR>1 i.e resistant:" + , pc_orR *100 ) + +# muts with highest OR +head(bar_or$mutationinformation, 10) + +# sort df +bar_or = bar_or[order(bar_or$or_mychisq + , bar_or$ligand_distance + #, bar_or$nca_dist + , bar_or$interface_dist + , decreasing = T), ] + +nrow(bar_or) +bar_or$drug_site = ifelse(bar_or$position%in%aa_pos_drug, "drug", "no") +table(bar_or$drug_site) + +bar_or$plp_site = ifelse(bar_or$position%in%aa_pos_plp, "plp", "no") +table(bar_or$plp_site) +top10_or = bar_or[1:10,] + +# are these active sites +top10_or$position[top10_or$position%in%active_aa_pos] + + +# clostest most sig +bar_or_lig = bar_or[bar_or$ligand_distance<10,] +bar_or_lig = bar_or_lig[order(bar_or_lig$ligand_distance, -bar_or_lig$or_mychisq), ] +table(bar_or_lig$signif_fdr) + + +bar_or_ppi = bar_or[bar_or$interface_dist<10,] +bar_or_ppi = bar_or_ppi[order(bar_or_ppi$interface_dist, -bar_or_ppi$or_mychisq), ] +table(bar_or_ppi$signif_fdr) diff --git a/scripts/plotting/plotting_thesis/alr/alr_appendix_tables.R b/scripts/plotting/plotting_thesis/alr/alr_appendix_tables.R new file mode 100644 index 0000000..948ab78 --- /dev/null +++ b/scripts/plotting/plotting_thesis/alr/alr_appendix_tables.R @@ -0,0 +1,459 @@ +#!/usr/bin/env Rscript +#source("~/git/LSHTM_analysis/config/alr.R") +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +outdir_stats = paste0(outdir_images,"stats/") +cat("\nOutput dir for stats:", outdir_stats) +################################################################### +geneL_normal = c("pnca") +#geneL_na = c("gid", "rpob") +geneL_na_v2 = c("gid") +geneL_ppi2 = c("alr", "embb", "katg", "rpob") +geneL_both = c("rpob") + + +if (tolower(gene)%in%geneL_na_v2) { + gene_colnames = c("mcsm_na_affinity", "mcsm_na_outcome") +} + +if (tolower(gene)%in%geneL_ppi2) { + gene_colnames = c("mcsm_ppi2_affinity", "mcsm_ppi2_outcome") +} + +#from plotting_globals() +LigDist_colname +ppi2Dist_colname +naDist_colname + +delta_symbol #delta_symbol = "\u0394"; delta_symbol +angstroms_symbol + +cat("\nAffinity Distance colnames:", length(affinity_dist_colnames) + , "\nThese are:", affinity_dist_colnames) +#=========== +# Data used +#=========== +df3 = merged_df3 + +cols_to_output = c("position" + , "sensitivity" + , "mutationinformation" + , affinity_dist_colnames[1] + , "ligand_affinity_change" + , "ligand_outcome" + , "mmcsm_lig" + , "mmcsm_lig_outcome" + , affinity_dist_colnames[2] + # #, affinity_dist_colnames[3] + # , "mcsm_na_affinity" + # , "mcsm_na_outcome" + # #, "mcsm_nca_affinity" + # #, "mcsm_nca_outcome" + , gene_colnames + , "maf" + , "or_mychisq" + , "pval_fisher") + +cols_to_output +df3_output = df3[, cols_to_output] +colnames(df3_output) +cat("\nSelecting columns:", length(colnames(df3_output))) +#=============================================== +# Add COLS and rounding: adjusted P-values + MAF +#============================================== +#----------------------------- +# adjusted P-values +#----------------------------- +# add cols: p_adj_fdr and signif_fdr +df3_output$p_adj_fdr = p.adjust(df3_output$pval_fisher, method = "fdr") +df3_output$signif_fdr = df3_output$p_adj_fdr +df3_output = dplyr::mutate(df3_output + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) +# rounding +df3_output$or_mychisq = round(df3_output$or_mychisq,2) +df3_output$p_adj_fdr = round(df3_output$p_adj_fdr,2) +head(df3_output) + +#---------- +# MAF (%) +#---------- +# add col maf_percent +df3_output$maf_percent = df3_output$maf*100 + +# rounding +df3_output$maf_percent = round(df3_output$maf_percent,2) +head(df3_output$af); head(df3_output$maf);head(df3_output$maf_percent) + +#---------- +# P-value +#---------- +df3_output$pval_fisher = round(df3_output$pval_fisher,2) + +class(df3_output) +head(df3_output) + +#################################### +# Appendix: ligand affinity +#################################### +df_lig = df3_output[df3_output[[LigDist_colname]]