diff --git a/scripts/plotting/AFFINITY_TEST.R b/scripts/plotting/AFFINITY_TEST.R new file mode 100644 index 0000000..48b1e8a --- /dev/null +++ b/scripts/plotting/AFFINITY_TEST.R @@ -0,0 +1,120 @@ +# find the most "impactful" effect value +biggest=max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))) + +abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==biggest + +abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==c(,biggest) + +max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))) + + +a2 = (a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]) +a2 + +biggest = max(abs(a2[1,])) + +#hmm +#which(abs(a2) == biggest) +#names(a2)[apply(a2, 1:4, function(i) which(i == max()))] + +# get row max +a2$row_maximum = apply(abs(a2[,-1]), 1, max) + +# get colname for abs(max_value) +#https://stackoverflow.com/questions/36960010/get-column-name-that-matches-specific-row-value-in-dataframe +#names(df)[which(df == 1, arr.ind=T)[, "col"]] +# yayy +names(a2)[which(abs(a2) == biggest, arr.ind=T)[, "col"]] + +#another:https://statisticsglobe.com/return-column-name-of-largest-value-for-each-row-in-r +colnames(a2)[max.col(abs(a2), ties.method = "first")] # Apply colnames & max.col functions +################################################# +# use whole df +#gene_aff_cols = c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled') + +biggest = max(abs(a[gene_aff_cols])) +a$max_es = biggest +a$effect = names(a[gene_aff_cols])[which(abs(a[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] + +effect_name = unique(a$effect) +#get index of value of max effect +ind = (which(abs(a[effect_name]) == biggest, arr.ind=T)) +a[effect_name][ind] +# extract sign +a$effect_sign = sign(a[effect_name][ind]) +######################################################## +# maxn <- function(n) function(x) order(x, decreasing = TRUE)[n] +# second_big = abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols])] +# apply(df, 1, function(x)x[maxn(1)(x)]) +# apply(a[gene_aff_cols], 1, function(x) abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols]))]) +######################################################### +# loop +a2 = df2[df2$position%in%c(167, 423, 427),] +test <- a2 %>% + dplyr::group_by(position) %>% + biggest = max(abs(a2[gene_aff_cols])) + a2$max_es = max(abs(a2[gene_aff_cols])) + a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] + effect_name = unique(a2$effect) + + #get index of value of max effect + ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T)) + a2[effect_name][ind] + # extract sign + a2$effect_dir = sign(a2[effect_name][ind]) +################################# +df2_short = df2[df2$position%in%c(167, 423, 427),] + +for (i in unique(df2_short$position) ){ + #print(i) + #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) + #cat(length(unique(df2$position))) + a2 = df2_short[df2_short$position==i,] + biggest = max(abs(a2[gene_aff_cols])) + a2$max_es = max(abs(a2[gene_aff_cols])) + a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] + effect_name = unique(a2$effect) + + #get index of value of max effect + ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T)) + a2[effect_name][ind] + # extract sign + a2$effect_sign = sign(a2[effect_name][ind]) +} + +#======================== +df2_short = df2[df2$position%in%c(167, 423, 427),] +df2_short = df2[df2$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),] +df2_short = df2[df2$position%in%c(435, 480),] + +give_col=function(x,y,df=df2_short){ + df[df$position==x,y] +} + +for (i in unique(df2_short$position) ){ + #print(i) + #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) + #cat(length(unique(df2$position))) + #df2_short[df2_short$position==i,gene_aff_cols] + + biggest = max(abs(give_col(i,gene_aff_cols))) + + df2_short[df2_short$position==i,'abs_max_effect'] = biggest + df2_short[df2_short$position==i,'effect_type']= names( + give_col(i,gene_aff_cols)[which( + abs( + give_col(i,gene_aff_cols) + ) == biggest, arr.ind=T + )[, "col"]]) + + effect_name = unique(df2_short[df2_short$position==i,'effect_type']) + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #df2_short[df2_short$position==i,c(effect_name)] + #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) + ind = rownames(which(abs(df2_short[df2_short$position==i + ,c('position',effect_name)][effect_name])== biggest + , arr.ind=T)) + df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,]) +} diff --git a/scripts/plotting/mcsm_affinity_data_only.R b/scripts/plotting/mcsm_affinity_data_only.R new file mode 100644 index 0000000..597e44d --- /dev/null +++ b/scripts/plotting/mcsm_affinity_data_only.R @@ -0,0 +1,223 @@ +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/alr.R") +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/katg.R") +source("~/git/LSHTM_analysis/config/rpob.R") + +source("/home/tanu/git/LSHTM_analysis/my_header.R") +######################################################### +# TASK: Generate averaged affinity values +# across all affinity tools for a given structure +# as applicable... +######################################################### + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene)) + +#OutFile1 +outfile_mean_aff = paste0(outdir_images, "/", tolower(gene) + , "_mean_affinity_all.csv") +print(paste0("Output file:", outfile_mean_aff)) + +#OutFile2 +outfile_mean_aff_priorty = paste0(outdir_images, "/", tolower(gene) + , "_mean_affinity_priority.csv") +print(paste0("Output file:", outfile_mean_aff_priorty)) + +#%%=============================================================== + +#============= +# Input +#============= +df3_filename = paste0("/home/tanu/git/Data/", drug, "/output/", tolower(gene), "_merged_df3.csv") +df3 = read.csv(df3_filename) +length(df3$mutationinformation) + +# mut_info checks +table(df3$mutation_info) +table(df3$mutation_info_orig) +table(df3$mutation_info_labels_orig) + +# used in plots and analyses +table(df3$mutation_info_labels) # different, and matches dst_mode +table(df3$dst_mode) + +# create column based on dst mode with different colname +table(is.na(df3$dst)) +table(is.na(df3$dst_mode)) + +#=============== +# Create column: sensitivity mapped to dst_mode +#=============== +df3$sensitivity = ifelse(df3$dst_mode == 1, "R", "S") +table(df3$sensitivity) + +length(unique((df3$mutationinformation))) +all_colnames = as.data.frame(colnames(df3)) +common_cols = c("mutationinformation" + , "X5uhc_position" + , "X5uhc_offset" + , "position" + , "dst_mode" + , "mutation_info_labels" + , "sensitivity" + , "ligand_distance" + , "interface_dist") + +all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)] +all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)] + +#=================== +# stability cols +#=================== +raw_cols_stability = c("duet_stability_change" + , "deepddg" + , "ddg_dynamut2" + , "ddg_foldx") + +scaled_cols_stability = c("duet_scaled" + , "deepddg_scaled" + , "ddg_dynamut2_scaled" + , "foldx_scaled") + +outcome_cols_stability = c("duet_outcome" + , "deepddg_outcome" + , "ddg_dynamut2_outcome" + , "foldx_outcome") + +#=================== +# affinity cols +#=================== +raw_cols_affinity = c("ligand_affinity_change" + , "mmcsm_lig" + , "mcsm_ppi2_affinity" + , "mcsm_na_affinity") + +scaled_cols_affinity = c("affinity_scaled" + , "mmcsm_lig_scaled" + , "mcsm_ppi2_scaled" + , "mcsm_na_scaled" ) + +outcome_cols_affinity = c( "ligand_outcome" + , "mmcsm_lig_outcome" + , "mcsm_ppi2_outcome" + , "mcsm_na_outcome") + +#=================== +# conservation cols +#=================== +# raw_cols_conservation = c("consurf_score" +# , "snap2_score" +# , "provean_score") +# +# scaled_cols_conservation = c("consurf_scaled" +# , "snap2_scaled" +# , "provean_scaled") +# +# # CANNOT strictly be used, as categories are not identical with conssurf missing altogether +# outcome_cols_conservation = c("provean_outcome" +# , "snap2_outcome" +# #consurf outcome doesn't exist +# ) + +###################################################################### +cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols + , raw_cols_affinity + , scaled_cols_affinity + , outcome_cols_affinity + , raw_cols_stability + , scaled_cols_stability + , outcome_cols_stability + )] + +cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols + #, raw_cols_affinity + , scaled_cols_stability + , scaled_cols_affinity)] + +df3_plot = df3[, cols_to_extract] + +# FIXME: ADD distance to NA when SP replies +DistCutOff_colnames = c("ligand_distance", "interface_dist") +DistCutOff = 10 + +#df3_affinity_filtered = df3_plot[ (df3_plot$ligand_distance<10 | df3_plot$interface_dist <10),] +df3_affinity_filtered = df3_plot[ (df3_plot$ligand_distance<10 & df3_plot$interface_dist <10),] + +c0u = unique(df3_affinity_filtered$position) +length(c0u) + +foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,] +bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,] + +wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity) +wilcox.test(foo$mmcsm_lig~foo$sensitivity) + +wilcox.test(foo$affinity_scaled~foo$sensitivity) +wilcox.test(foo$ligand_affinity_change~foo$sensitivity) + +wilcox.test(bar$mcsm_na_scaled~bar$sensitivity) +wilcox.test(bar$mcsm_na_affinity~bar$sensitivity) + +wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity) +wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity) + +############################################################## +df = df3_affinity_filtered +sum(is.na(df)) +df2 = na.omit(df) # Apply na.omit function + +gene_dist_cols = colnames(df2)[colnames(df2)%in%DistCutOff_colnames] +gene_aff_cols = colnames(df2)[colnames(df2)%in%scaled_cols_affinity] +gene_stab_cols = colnames(df2)[colnames(df2)%in%scaled_cols_stability] + +sel_cols = c("mutationinformation", "position", gene_stab_cols, gene_dist_cols, gene_aff_cols) +df3 = df2[, sel_cols] + +give_col=function(x,y,df=df3){ + df[df$position==x,y] +} + +for (i in unique(df3$position) ){ + #print(i) + biggest = max(abs(give_col(i,gene_aff_cols))) + + df3[df3$position==i,'abs_max_effect'] = biggest + df3[df3$position==i,'effect_type']= names( + give_col(i,gene_aff_cols)[which( + abs( + give_col(i,gene_aff_cols) + ) == biggest, arr.ind=T + )[, "col"]]) + + effect_name = unique(df3[df3$position==i,'effect_type']) + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #df3[df3$position==i,c(effect_name)] + #which(abs(df3[df3$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) + ind = rownames(which(abs(df3[df3$position==i + ,c('position', effect_name)][effect_name])== biggest + , arr.ind=T)) + df3[df3$position==i,'effect_sign'] = sign(df3[effect_name][ind,]) +} + + + + + + +#%%============================================================ +# output +write.csv(combined_df, outfile_mean_ens_st_aff + , row.names = F) +cat("Finished writing file:\n" + , outfile_mean_ens_st_aff + , "\nNo. of rows:", nrow(combined_df) + , "\nNo. of cols:", ncol(combined_df)) + +# end of script +#=============================================================== diff --git a/scripts/plotting/plotting_thesis/rpob/katg_other_plots.R b/scripts/plotting/plotting_thesis/alr/alr_other_plots.R similarity index 100% rename from scripts/plotting/plotting_thesis/rpob/katg_other_plots.R rename to scripts/plotting/plotting_thesis/alr/alr_other_plots.R diff --git a/scripts/plotting/plotting_thesis/alr/basic_barplots_alr.R b/scripts/plotting/plotting_thesis/alr/basic_barplots_alr.R new file mode 100644 index 0000000..50001a5 --- /dev/null +++ b/scripts/plotting/plotting_thesis/alr/basic_barplots_alr.R @@ -0,0 +1,364 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots +# basic barplots with outcome +# basic barplots with frequency of count of mutations +######################################################### +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/katg.R") +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#################################################### +class(merged_df3) + +df3 = subset(merged_df3, select = -c(pos_count)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################## +# blue, red bp +sts = 8 +lts = 8 +ats = 8 +als = 8 +ltis = 8 +geom_ls = 2.2 + +#pos_count +subtitle_size = 8 +geom_ls_pc = 2.2 +leg_text_size = 8 +axis_text_size = 8 +axis_label_size = 8 + +########################################################### +#------------------------------ +# plot default sizes +#------------------------------ +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname +naDist_colname + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]DistCutOff,"mCSM-lig"]=0 +corr_affinity_df[corr_affinity_df["Lig-Dist"]>DistCutOff,"mmCSM-lig"]=0 + +if (tolower(gene)%in%geneL_ppi2){ + corr_affinity_df[corr_affinity_df["PPI-Dist"]>DistCutOff,"mCSM-PPI2"]=0 +} + +if (tolower(gene)%in%geneL_na){ + corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0 +} + +# count 0 +#res <- colSums(corr_affinity_df==0)/nrow(corr_affinity_df)*100 +unmasked_vals <- nrow(corr_affinity_df) - colSums(corr_affinity_df==0) +unmasked_vals + +########################################################## +#================ +# Stability +#================ +corr_ps_colnames = c(static_cols + , "DUET" + , "FoldX" + , "DeepDDG" + , "Dynamut2" + , aff_dist_cols + , "dst_mode") + +corr_df_ps = corr_plotdf[, corr_ps_colnames] + +# Plot #1 +plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") + +########################################################## +#================ +# Conservation +#================ +corr_conservation_cols = c( static_cols + , "ConSurf" + , "SNAP2" + , "PROVEAN" + #, aff_dist_cols + , "dst_mode" +) + +corr_df_cons = corr_plotdf[, corr_conservation_cols] + +# Plot #2 +plot_corr_df_cons = my_gg_pairs(corr_df_cons, plot_title="Conservation estimates") + +########################################################## +#================ +# Affinity: lig, ppi and na as applicable +#================ +#corr_df_lig = corr_plotdf[corr_plotdf["Lig-Dist"]10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) +head(str_df) + +scaled_cols_tc = other_cols[grep("scaled", other_cols)] + + +################################################ +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df[[pos_colname]]==x,y] +} + +for (i in unique(str_df[[pos_colname]]) ){ + print(i) + #cat(length(unique(str_df[[pos_colname]]))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest + str_df[str_df[[pos_colname]]==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) + + str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +colnames(str_df) +table(str_df$effect_type) + +# check +str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] + +#================ +# for Plots +#================ +str_df_short = str_df[, c("mutationinformation", + #"position", + pos_colname, + "sensitivity" + , "effect_type" + , "effect_sign")] + +table(str_df_short$effect_type) +table(str_df_short$effect_sign) +str(str_df_short) + +# assign pe outcome +str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") +table(str_df_short$pe_outcome ) +table(str_df_short$effect_sign) + +#============== +# group effect type: +# lig, ppi2, nuc. acid, stability +#============== +affcols = c("affinity_scaled", "mmcsm_lig_scaled") +ppi2_cols = c("mcsm_ppi2_scaled") + +#lig +table(str_df_short$effect_type) +str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols + , "lig" + , str_df_short$effect_type) +table(str_df_short$effect_grouped) + +#ppi2 +str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%ppi2_cols + , "ppi2" + , str_df_short$effect_grouped) +table(str_df_short$effect_grouped) + + +#stability +str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", + "ppi2" + ) + , "stability" + , str_df_short$effect_grouped) + +table(str_df_short$effect_grouped) + +# create a sign as well +str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" + , str_df_short$effect_grouped) + +table(str_df_short$pe_effect_outcome) + +##################################################################### +# Chimera: for colouring +#################################################################### + +#------------------------------------- +# get df with unique position +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] + +if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c(pos_colname, + "sensitivity", + "pe_outcome", + "effect_grouped", + "pe_effect_outcome")] +head(str_df_plot_cols) + +# colour intensity based on sign +#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") + +table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) +head(str_df_plot_cols) + +# colour based on effect +table(str_df_plot_cols$pe_effect_outcome) + +# colors = c("#ffd700" #gold +# , "#f0e68c" #khaki +# , "#da70d6"# orchid +# , "#ff1493"# deeppink +# , "#a0522d" #sienna +# , "#d2b48c" # tan +# , "#00BFC4" #, "#007d85" #blue +# , "#F8766D" )# red + +pe_colour_map = c("DD_lig" = "#ffd700" # gold + , "SS_lig" = "#f0e68c" # khaki + + , "DD_nucleic_acid"= "#a0522d" # sienna + , "SS_nucleic_acid"= "#d2b48c" # tan + + , "DD_ppi2" = "#da70d6" # orchid + , "SS_ppi2" = "#ff1493" # deeppink + + , "DD_stability" = "#f8766d" # red + , "SS_stability" = "#00BFC4") # blue + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome + ,function(x){pe_colour_map[[x]]} + )) +head(str_df_plot_cols$colour_map) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue +# , "_" +# , str_df_plot_cols$colour_map) +# head(str_df_plot_cols$colours) +# table(str_df_plot_cols$colours) +# +# +# class(str_df_plot_cols$colour_map) +# str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +chain_suffix = ".A" +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +# -ve Lig Aff +dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] +if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ + dd_lig_pos = dd_lig[[pos_colname]] +}else{ + stop("Abort: DD affinity colour numbers mismtatch") +} + +# +ve Lig Aff +ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] +if (!empty(ss_lig)){ + if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ + ss_lig_pos = ss_lig[[pos_colname]] + }else{ + stop("Abort: SS affinity colour numbers mismtatch") + } + #put in chimera cmd + paste0(dd_lig_pos, chain_suffix) + paste0(ss_lig_pos, chain_suffix) +} + + + + +#=================================================== +#------------------- +# PPI2 Affinity +#------------------- +# -ve PPI2 +dd_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_ppi2",] +if (nrow(dd_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['DD_ppi2']]){ + dd_ppi2_pos = dd_ppi2[[pos_colname]] +}else{ + stop("Abort: DD PPI2 colour numbers mismtatch") +} + +# +ve PPI2 +ss_ppi2 = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_ppi2",] +if (nrow(ss_ppi2) == table(str_df_plot_cols$pe_effect_outcome)[['SS_ppi2']]){ + ss_ppi2_pos = ss_ppi2[[pos_colname]] +}else{ + stop("Abort: SS PPI2 colour numbers mismtatch") +} + +#put in chimera cmd +paste0(dd_ppi2_pos,chain_suffix) +paste0(ss_ppi2_pos,chain_suffix) + +#========================================================= +#------------------------ +# Stability +#------------------------ +# -ve Stability +dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] +if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ + dd_stability_pos = dd_stability[[pos_colname]] +}else{ + stop("Abort: DD Stability colour numbers mismtatch") +} + +# +ve Stability +ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] +if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ + ss_stability_pos = ss_stability[[pos_colname]] +}else{ + stop("Abort: SS Stability colour numbers mismtatch") +} + +#put in chimera cmd +# stabiliting first as it has less numbers +paste0(ss_stability_pos, chain_suffix) +paste0(dd_stability_pos, chain_suffix) +#################################################################### + diff --git a/scripts/plotting/plotting_thesis/alr/sensitivity_count_alr.R b/scripts/plotting/plotting_thesis/alr/sensitivity_count_alr.R new file mode 100644 index 0000000..66caef2 --- /dev/null +++ b/scripts/plotting/plotting_thesis/alr/sensitivity_count_alr.R @@ -0,0 +1,65 @@ +#========================= +# Count Sensitivity +# Mutations and positions +#========================= +pos_colname_c ="position" + +sensP_df = merged_df3[,c("mutationinformation", + #"position", + pos_colname_c, + "sensitivity")] + +head(sensP_df) +table(sensP_df$sensitivity) + +#--------------- +# Total unique positions +#---------------- +tot_mut_pos = length(unique(sensP_df[[pos_colname_c]])) +cat("\nNo of Tot muts sites:", tot_mut_pos) + +# resistant mut pos +sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"] +sens_site_UR = unique(sens_site_allR) +length(sens_site_UR) + +# Sensitive mut pos +sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"] +sens_site_US = unique(sens_site_allS) +length(sens_site_UR) + +#--------------- +# Common Sites +#---------------- +common_pos = intersect(sens_site_UR,sens_site_US) +site_Cc = length(common_pos) +cat("\nNo of Common sites:", site_Cc + , "\nThese are:", common_pos) + +#--------------- +# Resistant muts +#---------------- +site_R = sens_site_UR[!sens_site_UR%in%common_pos] +site_Rc = length(site_R) + +if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){ + cat("\nNo of R muts:", length(sens_site_allR) + , "\nNo. of R sites:",site_Rc + , "\nThese are:", site_R +) +} + +#--------------- +# Sensitive muts +#---------------- +site_S = sens_site_US[!sens_site_US%in%common_pos] +site_Sc = length(site_S) + +if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){ + cat("\nNo of S muts:", length(sens_site_allS) + , "\nNo. of S sites:", site_Sc + , "\nThese are:", site_S) +} + +######################### + diff --git a/scripts/plotting/plotting_thesis/gid/basic_barplots_gid.R b/scripts/plotting/plotting_thesis/gid/basic_barplots_gid.R new file mode 100644 index 0000000..add5194 --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/basic_barplots_gid.R @@ -0,0 +1,363 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots +# basic barplots with outcome +# basic barplots with frequency of count of mutations +######################################################### +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +#cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#################################################### +class(merged_df3) + +df3 = subset(merged_df3, select = -c(pos_count)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################## +# blue, red bp +sts = 8 +lts = 8 +ats = 8 +als = 8 +ltis = 8 +geom_ls = 2.2 + +#pos_count +subtitle_size = 8 +geom_ls_pc = 2.2 +leg_text_size = 8 +axis_text_size = 8 +axis_label_size = 8 + +########################################################### +#------------------------------ +# plot default sizes +#------------------------------ +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname +naDist_colname + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]DistCutOff,"mCSM-NA"]=0 + corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0 } # count 0 @@ -97,6 +96,7 @@ corr_df_ps = corr_plotdf[, corr_ps_colnames] # Plot #1 plot_corr_df_ps = my_gg_pairs(corr_df_ps, plot_title="Stability estimates") + ########################################################## #================ # Conservation @@ -142,37 +142,62 @@ corr_df_aff = corr_affinity_df[, corr_aff_colnames] colnames(corr_df_aff) # Plot #3 -plot_corr_df_aff = my_gg_pairs(corr_df_aff, - plot_title="Affinity estimates", - tt_args_size = 4, - gp_args_size = 4) +plot_corr_df_aff = my_gg_pairs(corr_df_aff + , plot_title="Affinity estimates" + #, tt_args_size = 4 + #, gp_args_size = 4 + ) -#============= -# combine -#============= +#### Combine plots ##### +# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300) +# png(paste0(outdir_images +# ,tolower(gene) +# ,"_CorrAB.png"), height = 6, width=11.75, unit="in",res=300) +# +# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps), +# ggmatrix_gtable(plot_corr_df_cons), +# # ggmatrix_gtable(plot_corr_df_aff), +# # nrow=1, ncol=3, rel_heights = 7,7,3 +# nrow=1, +# #rel_heights = 1,1 +# labels = "AUTO", +# label_size = 12) +# dev.off() +# +# # affinity corr +# #png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300) +# png(paste0(outdir_images +# ,tolower(gene) +# ,"_CorrC.png"), height =7, width=7, unit="in",res=300) +# +# cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff), +# labels = "C", +# label_size = 12) +# dev.off() -#png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300) +#### Combine A #### png(paste0(outdir_images ,tolower(gene) - ,"_CorrAB.png"), height = 7, width=11.75, unit="in",res=300) + ,"_CorrA.png"), height =8, width=8, unit="in",res=300) cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_ps), - ggmatrix_gtable(plot_corr_df_cons), + labels = "A", + label_size = 12) +dev.off() + +#### Combine B+C #### +# B + C +png(paste0(outdir_images + ,tolower(gene) + ,"_CorrBC.png"), height = 6, width=11.75, unit="in",res=300) + +cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_cons), + ggmatrix_gtable(plot_corr_df_aff), # ggmatrix_gtable(plot_corr_df_aff), # nrow=1, ncol=3, rel_heights = 7,7,3 nrow=1, - rel_widths = c(1.5,1), - labels = "AUTO", + #rel_heights = 1,1 + labels = c("B", "C"), label_size = 12) dev.off() -# affinity corr -#png("/home/tanu/tmp/gg_pairs_affinity.png", height =7, width=7, unit="in",res=300) -png(paste0(outdir_images - ,tolower(gene) - ,"_CorrC.png"), height =7, width=7, unit="in",res=300) - -cowplot::plot_grid(ggmatrix_gtable(plot_corr_df_aff), - labels = "C", - label_size = 12) -dev.off() diff --git a/scripts/plotting/plotting_thesis/gid/gid_other_plots.R b/scripts/plotting/plotting_thesis/gid/gid_other_plots.R new file mode 100644 index 0000000..80d95e5 --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/gid_other_plots.R @@ -0,0 +1,2 @@ + +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R") diff --git a/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R b/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R new file mode 100644 index 0000000..c8ed1e4 --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/pe_sens_site_count_gid.R @@ -0,0 +1,173 @@ +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/prominent_effects_gid.R") +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/gid/sensitivity_count_gid.R") + +############################################################## +# PE count +#lig-- na--ppi2--stab +# pe_colour_map = c("DD_lig" = "#ffd700" # gold +# , "SS_lig" = "#f0e68c" # khaki +# +# , "DD_nucleic_acid"= "#a0522d" # sienna +# , "SS_nucleic_acid"= "#d2b48c" # tan +# +# , "DD_ppi2" = "#da70d6" # orchid +# , "SS_ppi2" = "#ff1493" # deeppink +# +# , "DD_stability" = "#f8766d" # red +# , "SS_stability" = "#00BFC4") # blue +table(str_df_plot_cols$pe_effect_outcome) +############################################################## +#=========== +#PE count +#=========== +rects <- data.frame(x=1:6, + colors = c("#ffd700" , + "#f0e68c" , + + "#a0522d" , + "#d2b48c" , + + "#f8766d" , + "#00BFC4") + ) + +rects$text = c("-ve Lig" + , "+ve Lig" + + , "-ve\nNuc.Acid" + , "+ve\nNuc.Acid" + + , "-ve stability" + , "+ve stability" +) + +cell1 = table(str_df_plot_cols$pe_effect_outcome)[["DD_lig"]] +cell2 = 0 + +cell3 = table(str_df_plot_cols$pe_effect_outcome)[["DD_nucleic_acid"]] +cell4 = table(str_df_plot_cols$pe_effect_outcome)[["SS_nucleic_acid"]] + +#cell5 = table(str_df_plot_cols$pe_effect_outcome)[["DD_ppi2"]] +#cell6 = table(str_df_plot_cols$pe_effect_outcome)[["SS_ppi2"]] + +cell7 = table(str_df_plot_cols$pe_effect_outcome)[["DD_stability"]] +cell8 = table(str_df_plot_cols$pe_effect_outcome)[["SS_stability"]] + + +#rects$numbers = c(38, 0, 22, 9, 108, 681) #for embb +rects$numbers = c(cell1, cell2, + cell3, cell4, + # cell5, cell6, + cell7, cell8) + +rects$num_labels = paste0("n=", rects$numbers) + +rects +#------ +# Plot +#------ +#https://stackoverflow.com/questions/47986055/create-a-rectangle-filled-with-text +peP = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) + + geom_tile(width = 1, height = 1) + # make square tiles + geom_text(color = "black", size = 1.7) + # add white text in the middle + scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame + coord_fixed() + # make sure tiles are square + coord_flip()+ scale_x_reverse() + + # theme_void() # remove any axis markings + theme_nothing() # remove any axis markings +peP + +#------ +# Plot: this one is better +#------ +peP2 = ggplot(rects, aes(x, y = 0, fill = colors, label = paste0(text,"\n", num_labels))) + + geom_tile() + # make square tiles + geom_text(color = "black", size = 1.6) + # add white text in the middle + scale_fill_identity(guide = "none") + # color the tiles with the colors in the data frame + coord_fixed() + # make sure tiles are square + theme_nothing() # remove any axis markings +peP2 + +######################################################## +# From: script sensitivity_count per gene +#=============================== +# Sensitivity count: SITE +#=============================== +#-------- +# embb +#-------- +#rsc = 54 +#ccc = 46 +#ssc = 470 + +rsc = site_Rc; rsc +ccc = site_Cc; ccc +ssc = site_Sc; ssc + +rect_rs_siteC <- data.frame(x=1:3, + colors = c("red", + "purple", + "blue") + ) + +rect_rs_siteC +rect_rs_siteC$text = c("Resistant", + "Common", + "Sensitive") + +rect_rs_siteC$numbers = c(rsc,ccc,ssc) +rect_rs_siteC$num_labels = paste0("n=", rect_rs_siteC$numbers) +rect_rs_siteC + +#------ +# Plot +#------ +sens_siteP = ggplot(rect_rs_siteC, aes(x, y = 0, + fill = colors, + label = num_labels + #,label = paste0(text,"\n", num_labels) + )) + + geom_tile(width = 1, height = 1) + + #geom_text(color = "black", size = 1.7) + + geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + + scale_fill_identity(guide = "none") + + coord_fixed()+ + theme_nothing() # remove any axis markings +sens_siteP + +################################################################ +#=============================== +# Sensitivity count: Mutations +#=============================== +table(sensP_df$sensitivity) +muts_Rc = table(sensP_df$sensitivity)[["R"]] +muts_Sc = table(sensP_df$sensitivity)[["S"]] +rect_sens <- data.frame(x=1:2, + colors = c("red", + "blue") + ) + +rect_sens$text = c("Resistant", + "Sensitive") +rect_sens$numbers = c(muts_Rc,muts_Sc) +rect_sens$num_labels = paste0("n=", rect_sens$numbers) +rect_sens +#------ +# Plot +#------ +sensP = ggplot(rect_sens, aes(x, y = 0, + fill = colors, + label = paste0(text,"\n", num_labels))) + + geom_tile(width = 1, height = 1) + + #geom_text(color = "black", size = 1.7) + + geom_label(color = "black", size = 1.7,fill = "white", alpha=0.7) + + scale_fill_identity(guide = "none") + + coord_fixed()+ + theme_nothing() # remove any axis markings +sensP + +sensP2 = sensP + + coord_flip() + scale_x_reverse() +sensP2 + + diff --git a/scripts/plotting/plotting_thesis/gid/prominent_effects_gid.R b/scripts/plotting/plotting_thesis/gid/prominent_effects_gid.R new file mode 100644 index 0000000..db82d86 --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/prominent_effects_gid.R @@ -0,0 +1,341 @@ +#!/usr/bin/env Rscript +#source("~/git/LSHTM_analysis/config/gid.R") +# get plotting dfs +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +######################################################## +pos_colname = "position" + +#------------- +# from ~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R +#------------- +length(all_stability_cols); length(raw_stability_cols) +length(scaled_stability_cols); length(outcome_stability_cols) +length(affinity_dist_colnames) + + +static_cols = c("mutationinformation", + #"position", + pos_colname, + "sensitivity") + +other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames) + +#omit avg cols and foldx_scaled_signC cols +other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)] +other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )] +other_cols + +cols_to_extract = c(static_cols, other_cols) +cat("\nExtracting cols:", cols_to_extract) +expected_ncols = length(static_cols) + length(other_cols) +expected_ncols + +str_df = merged_df3[, cols_to_extract] + +if (ncol(str_df) == expected_ncols){ + cat("\nPASS: successfully extracted cols for calculating prominent effects") +}else{ + stop("\nAbort: Could not extract cols for calculating prominent effects") +} + +#========================= +# Masking affinity columns +#========================= +# First make values for affinity cols 0 when their corresponding dist >10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) +head(str_df) + +# get names of cols to calculate the prominent effects from +# scaled_cols_tc = c("duet_scaled", +# "deepddg_scaled", +# "ddg_dynamut2_scaled", +# "foldx_scaled", +# "affinity_scaled", +# "mmcsm_lig_scaled", +# "mcsm_ppi2_scaled", "mcsm_na_scaled") + +scaled_cols_tc = other_cols[grep("scaled", other_cols)] +############################################### + + +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df[[pos_colname]]==x,y] +} + +for (i in unique(str_df[[pos_colname]]) ){ + print(i) + #cat(length(unique(str_df[[pos_colname]]))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest + str_df[str_df[[pos_colname]]==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) + + str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +colnames(str_df) +table(str_df$effect_type) + +# check +str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] + +#================ +# for Plots +#================ +str_df_short = str_df[, c("mutationinformation", + #"position", + pos_colname, + "sensitivity" + , "effect_type" + , "effect_sign")] + +table(str_df_short$effect_type) +table(str_df_short$effect_sign) +str(str_df_short) + +# assign pe outcome +str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") +table(str_df_short$pe_outcome ) +table(str_df_short$effect_sign) + +#============== +# group effect type: +# lig, ppi2, nuc. acid, stability +#============== +affcols = c("affinity_scaled", "mmcsm_lig_scaled") +nuc_na_cols = c("mcsm_na_scaled") + +#lig +table(str_df_short$effect_type) +str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols + , "lig" + , str_df_short$effect_type) +table(str_df_short$effect_grouped) + + +#na +str_df_short$effect_grouped = ifelse(str_df_short$effect_grouped%in%nuc_na_cols + , "nucleic_acid" + , str_df_short$effect_grouped) +table(str_df_short$effect_grouped) + +#stability +str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig", + "nucleic_acid") + , "stability" + , str_df_short$effect_grouped) + +table(str_df_short$effect_grouped) + +# create a sign as well +str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" + , str_df_short$effect_grouped) + +table(str_df_short$pe_effect_outcome) + +##################################################################### +# Chimera: for colouring +#################################################################### + +#------------------------------------- +# get df with unique position +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] + +if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c(pos_colname, + "sensitivity", + "pe_outcome", + "effect_grouped", + "pe_effect_outcome")] +head(str_df_plot_cols) + +# colour intensity based on sign +#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") + +table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) +head(str_df_plot_cols) + +# colour based on effect +table(str_df_plot_cols$pe_effect_outcome) + +# colors = c("#ffd700" #gold +# , "#f0e68c" #khaki +# , "#da70d6"# orchid +# , "#ff1493"# deeppink +# , "#a0522d" #sienna +# , "#d2b48c" # tan +# , "#00BFC4" #, "#007d85" #blue +# , "#F8766D" )# red + +pe_colour_map = c("DD_lig" = "#ffd700" # gold + , "SS_lig" = "#f0e68c" # khaki + + , "DD_nucleic_acid"= "#a0522d" # sienna + , "SS_nucleic_acid"= "#d2b48c" # tan + + , "DD_ppi2" = "#da70d6" # orchid + , "SS_ppi2" = "#ff1493" # deeppink + + , "DD_stability" = "#f8766d" # red + , "SS_stability" = "#00BFC4") # blue + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome + ,function(x){pe_colour_map[[x]]} + )) +head(str_df_plot_cols$colour_map) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue +# , "_" +# , str_df_plot_cols$colour_map) +# head(str_df_plot_cols$colours) +# table(str_df_plot_cols$colours) +# +# +# class(str_df_plot_cols$colour_map) +# str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +chain_suffix = ".A" +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +# -ve Lig Aff +dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] +if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ + dd_lig_pos = dd_lig[[pos_colname]] +}else{ + stop("Abort: DD affinity colour numbers mismtatch") +} + +# +ve Lig Aff +ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] +if (!empty(ss_lig)){ +if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ + ss_lig_pos = ss_lig[[pos_colname]] +}else{ + stop("Abort: SS affinity colour numbers mismtatch") +} + #put in chimera cmd + paste0(dd_lig_pos,chain_suffix) + paste0(ss_lig_pos,chain_suffix) + +} + +#=================================================== +#------------------------ +# Nucleic Acid Affinity +#------------------------ +# -ve NA aff +dd_nca = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_nucleic_acid",] +if (nrow(dd_nca) == table(str_df_plot_cols$pe_effect_outcome)[['DD_nucleic_acid']]){ + dd_nca_pos = dd_nca[[pos_colname]] +}else{ + stop("Abort: DD nucleic_acid colour numbers mismtatch") +} + +# +ve NA aff +ss_nca = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_nucleic_acid",] +if (nrow(ss_nca) == table(str_df_plot_cols$pe_effect_outcome)[['SS_nucleic_acid']]){ + ss_nca_pos = ss_nca[[pos_colname]] +}else{ + stop("Abort: SS nucleic_acid colour numbers mismtatch") +} + +#put in chimera cmd +paste0(dd_nca_pos, chain_suffix) +paste0(ss_nca_pos, chain_suffix) + +#========================================================= +#------------------------ +# Stability +#------------------------ +# -ve Stability +dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] +if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ + dd_stability_pos = dd_stability[[pos_colname]] +}else{ + stop("Abort: DD Stability colour numbers mismtatch") +} + +# +ve Stability +ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] +if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ + ss_stability_pos = ss_stability[[pos_colname]] +}else{ + stop("Abort: SS Stability colour numbers mismtatch") +} + +#put in chimera cmd +# stabiliting first as it has less numbers +paste0(ss_stability_pos, chain_suffix) +paste0(dd_stability_pos, chain_suffix) +#################################################################### + diff --git a/scripts/plotting/plotting_thesis/gid/sensitivity_count_gid.R b/scripts/plotting/plotting_thesis/gid/sensitivity_count_gid.R new file mode 100644 index 0000000..66caef2 --- /dev/null +++ b/scripts/plotting/plotting_thesis/gid/sensitivity_count_gid.R @@ -0,0 +1,65 @@ +#========================= +# Count Sensitivity +# Mutations and positions +#========================= +pos_colname_c ="position" + +sensP_df = merged_df3[,c("mutationinformation", + #"position", + pos_colname_c, + "sensitivity")] + +head(sensP_df) +table(sensP_df$sensitivity) + +#--------------- +# Total unique positions +#---------------- +tot_mut_pos = length(unique(sensP_df[[pos_colname_c]])) +cat("\nNo of Tot muts sites:", tot_mut_pos) + +# resistant mut pos +sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"] +sens_site_UR = unique(sens_site_allR) +length(sens_site_UR) + +# Sensitive mut pos +sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"] +sens_site_US = unique(sens_site_allS) +length(sens_site_UR) + +#--------------- +# Common Sites +#---------------- +common_pos = intersect(sens_site_UR,sens_site_US) +site_Cc = length(common_pos) +cat("\nNo of Common sites:", site_Cc + , "\nThese are:", common_pos) + +#--------------- +# Resistant muts +#---------------- +site_R = sens_site_UR[!sens_site_UR%in%common_pos] +site_Rc = length(site_R) + +if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){ + cat("\nNo of R muts:", length(sens_site_allR) + , "\nNo. of R sites:",site_Rc + , "\nThese are:", site_R +) +} + +#--------------- +# Sensitive muts +#---------------- +site_S = sens_site_US[!sens_site_US%in%common_pos] +site_Sc = length(site_S) + +if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){ + cat("\nNo of S muts:", length(sens_site_allS) + , "\nNo. of S sites:", site_Sc + , "\nThese are:", site_S) +} + +######################### + diff --git a/scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R b/scripts/plotting/plotting_thesis/katg/basic_barplots_katg_layout.R similarity index 100% rename from scripts/plotting/plotting_thesis/katg/basic_barplots_layput_katg.R rename to scripts/plotting/plotting_thesis/katg/basic_barplots_katg_layout.R diff --git a/scripts/plotting/plotting_thesis/katg/katg_other_plots.R b/scripts/plotting/plotting_thesis/katg/katg_other_plots.R new file mode 100644 index 0000000..989f8ee --- /dev/null +++ b/scripts/plotting/plotting_thesis/katg/katg_other_plots.R @@ -0,0 +1,2 @@ + +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R") \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R index aae8f3d..071e0ce 100644 --- a/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R +++ b/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R @@ -7,7 +7,6 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp #linage_dist_ens_stability.R ########################################### # svg -# my_label_size = 12 # linPlots_combined = paste0(outdir_images # , tolower(gene) # ,"_linP_combined.svg") @@ -31,6 +30,8 @@ source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp # dev.off() # png +my_label_size = 12 + linPlots_combined = paste0(outdir_images , tolower(gene) ,"_linP_combined.png") diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R index 769c32c..42919bc 100644 --- a/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R +++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R @@ -7,16 +7,9 @@ #============= # Data: Input #============== -#source("~/git/LSHTM_analysis/config/pnca.R") -#source("~/git/LSHTM_analysis/config/embb.R") -#source("~/git/LSHTM_analysis/config/gid.R") - -#source("~/git/LSHTM_analysis/config/alr.R") -#source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/rpob.R") #source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") -#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") sourced by above #cat("\nSourced plotting cols as well:", length(plotting_cols)) diff --git a/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob_layout.R similarity index 94% rename from scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R rename to scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob_layout.R index 43080ff..b0c0d0f 100644 --- a/scripts/plotting/plotting_thesis/rpob/basic_barplots_layout_rpob.R +++ b/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob_layout.R @@ -1,4 +1,18 @@ -#source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/pe_sens_site_count_rpob.R") +#============= +# Data: Input +#============== +source("~/git/LSHTM_analysis/config/rpob.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +#cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/basic_barplots_rpob.R") +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R") if ( tolower(gene)%in%c("rpob") ){ cat("\nPlots available for layout are:") diff --git a/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R b/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R index 2bc7b89..f0338b2 100644 --- a/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R +++ b/scripts/plotting/plotting_thesis/rpob/gg_pairs_all_rpob.R @@ -1,6 +1,11 @@ -#source("~/git/LSHTM_analysis/config/embb.R") -#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") -#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") +source("~/git/LSHTM_analysis/config/rpob.R") +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) my_gg_pairs=function(plot_df, plot_title , tt_args_size = 2.5 @@ -72,7 +77,7 @@ if (tolower(gene)%in%geneL_ppi2){ } if (tolower(gene)%in%geneL_na){ - corr_affinity_df[corr_affinity_df["NCA-Dist"]>DistCutOff,"mCSM-NA"]=0 + corr_affinity_df[corr_affinity_df["NA-Dist"]>DistCutOff,"mCSM-NA"]=0 } # count 0 diff --git a/scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R b/scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R deleted file mode 100644 index bfe6118..0000000 --- a/scripts/plotting/plotting_thesis/rpob/linage_bp_dist.R +++ /dev/null @@ -1,172 +0,0 @@ -#!/usr/bin/env Rscript - -######################################################### -# TASK: Lineage plots [merged_df2] -# Count -# Diversity -# Average stability dist -# Avergae affinity dist: optional -######################################################### -#======= -# output -#======= -# outdir_images = paste0("~/git/Writing/thesis/images/results/" -# , tolower(gene), "/") -# cat("plots will output to:", outdir_images) -######################################################### - -#=============== -#Quick numbers checks -#=============== -nsample_lin = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),] - -if ( all(table(nsample_lin$sensitivity)== table(nsample_lin$mutation_info_labels)) ){ - cat("\nTotal no. of samples belonging to L1-l4 for", gene,":", nrow(nsample_lin) - , "\nCounting R and S samples") - if( sum(table(nsample_lin$sensitivity)) == nrow(nsample_lin) ){ - cat("\nPASSNumbers cross checked:") - print(table(nsample_lin$sensitivity)) - } -}else{ - stop("Abort: Numbers mismatch. Please check") -} -######################################################################## -################################################### -# Lineage barplots # -################################################### - -#=============================== -# lineage sample and SNP count -#=============================== -lin_countP = lin_count_bp(lf_data = lineage_dfL[['lin_lf']] - , all_lineages = F - , x_categ = "sel_lineages" - , y_count = "p_count" - , use_lineages = c("L1", "L2", "L3", "L4") - , bar_fill_categ = "count_categ" - , display_label_col = "p_count" - , bar_stat_stype = "identity" - , d_lab_size = 8 - , d_lab_col = "black" - , my_xats = 25 # x axis text size - , my_yats = 25 # y axis text sized_lab_size - , my_xals = 25 # x axis label size - , my_yals = 25 # y axis label size - , my_lls = 25 # legend label size - , bar_col_labels = c("SNPs", "Total Samples") - , bar_col_values = c("grey50", "gray75") - , bar_leg_name = "" - , leg_location = "top" - , y_log10 = F - , y_scale_percent = FALSE - , y_label = c("Count") - ) -lin_countP -#=============================== -# lineage SNP diversity count -#=============================== -lin_diversityP = lin_count_bp_diversity(lf_data = lineage_dfL[['lin_wf']] - , x_categ = "sel_lineages" - , y_count = "snp_diversity" - #, all_lineages = F - , use_lineages = c("L1", "L2", "L3", "L4") - , display_label_col = "snp_diversity_f" - , bar_stat_stype = "identity" - , x_lab_angle = 90 - , d_lab_size =9 - , my_xats = 25 # x axis text size - , my_yats = 25 # y axis text size - , my_xals = 25 # x axis label size - , my_yals = 25 # y axis label size - , my_lls = 25 # legend label size - , y_log10 = F - , y_scale_percent = F - , leg_location = "top" - , y_label = "Percent" #"SNP diversity" - , bp_plot_title = "nsSNP diversity" - , title_colour = "black" #"chocolate4" - , subtitle_text = NULL - , sts = 20 - , subtitle_colour = "#350E20FF") -lin_diversityP -#============================================= -# Output plots: Lineage count and Diversity -#============================================= -# lineage_bp_CL = paste0(outdir_images -# ,tolower(gene) -# ,"_lineage_bp_CL_Tall.svg") -# -# cat("Lineage barplots:", lineage_bp_CL) -# svg(lineage_bp_CL, width = 8, height = 15) -# -# cowplot::plot_grid(lin_countP, lin_diversityP -# #, labels = c("(a)", "(b)", "(c)", "(d)") -# , nrow = 2 -# # , ncols = 2 -# , labels = "AUTO" -# , label_size = 25) -# -# dev.off() -######################################################################## - - -################################################### -# Stability dist # -################################################### -# scaled_cols_stability = c("duet_scaled" -# , "deepddg_scaled" -# , "ddg_dynamut2_scaled" -# , "foldx_scaled" -# , "avg_stability_scaled") - -my_xlabel = paste0("Average stability ", "(", stability_suffix, ")"); my_xlabel -#plotdf = merged_df2[merged_df2$lineage%in%c("L1", "L2", "L3", "L4"),] - -linP_dm_om = lineage_distP(merged_df2 - , with_facet = F - , x_axis = "avg_stability_scaled" - , y_axis = "lineage_labels" - , x_lab = my_xlabel - , use_lineages = c("L1", "L2", "L3", "L4") - #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999") - , fill_categ = "sensitivity" - , fill_categ_cols = c("red", "blue") - , label_categories = c("Resistant", "Sensitive") - , leg_label = "Mutation group" - , my_ats = 22 # axis text size - , my_als = 22 # axis label size - , my_leg_ts = 22 - , my_leg_title = 22 - , my_strip_ts = 22 - , alpha = 0.56 -) - -linP_dm_om - -################################################### -# Affinity dist [OPTIONAL] # -################################################### -# scaled_cols_affinity = c("affinity_scaled" -# , "mmcsm_lig_scaled" -# , "mcsm_ppi2_scaled" -# , "mcsm_na_scaled" -# , "avg_lig_affinity_scaled") - -# lineage_distP(merged_df2 -# , with_facet = F -# , x_axis = "avg_lig_affinity_scaled" -# , y_axis = "lineage_labels" -# , x_lab = my_xlabel -# , use_lineages = c("L1", "L2", "L3", "L4") -# #, fill_categ = "mutation_info_orig", fill_categ_cols = c("#E69F00", "#999999") -# , fill_categ = "sensitivity" -# , fill_categ_cols = c("red", "blue") -# , label_categories = c("Resistant", "Sensitive") -# , leg_label = "Mutation group" -# , my_ats = 22 # axis text size -# , my_als = 22 # axis label size -# , my_leg_ts = 22 -# , my_leg_title = 22 -# , my_strip_ts = 22 -# , alpha = 0.56 -# ) diff --git a/scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R b/scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R deleted file mode 100644 index 00f3adc..0000000 --- a/scripts/plotting/plotting_thesis/rpob/linage_bp_dist_layout.R +++ /dev/null @@ -1,30 +0,0 @@ -#!/usr/bin/env Rscript - -########################################### -# TASK: generate plots for lineage -# Individual plots in -#lineage_bp_both.R -#linage_dist_ens_stability.R -########################################### -my_label_size = 25 - -linPlots_combined = paste0(outdir_images - , tolower(gene) - ,"_linP_combined.svg") - -cat("\nOutput plot:", linPlots_combined) -svg(linPlots_combined, width = 18, height = 12) - -cowplot::plot_grid( - cowplot::plot_grid(lin_countP, lin_diversityP - , nrow = 2 - , labels = "AUTO" - , label_size = my_label_size), - NULL, - linP_dm_om, - nrow = 1, - labels = c("", "", "C"), - label_size = my_label_size, - rel_widths = c(35, 3, 52) -) -dev.off() \ No newline at end of file diff --git a/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R b/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R index 741d7fe..f2a6c32 100644 --- a/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R +++ b/scripts/plotting/plotting_thesis/rpob/pe_sens_site_count_rpob.R @@ -1,5 +1,5 @@ -source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/prominent_effects_rpob.R") -source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/sensitivity_count_rpob.R") +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/prominent_effects_rpob.R") +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/rpob/sensitivity_count_rpob.R") ############################################################## # PE count diff --git a/scripts/plotting/plotting_thesis/rpob/rpob_other_plots.R b/scripts/plotting/plotting_thesis/rpob/rpob_other_plots.R new file mode 100644 index 0000000..989f8ee --- /dev/null +++ b/scripts/plotting/plotting_thesis/rpob/rpob_other_plots.R @@ -0,0 +1,2 @@ + +source("/home/tanu/git/LSHTM_analysis/scripts/plotting/plotting_thesis/linage_bp_dist_layout.R") \ No newline at end of file