diff --git a/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R b/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R new file mode 100644 index 0000000..ef114a1 --- /dev/null +++ b/scripts/plotting/plotting_thesis/pnca/basic_barplots_pnca.R @@ -0,0 +1,364 @@ +#!/usr/bin/env Rscript +######################################################### +# TASK: Barplots +# basic barplots with outcome +# basic barplots with frequency of count of mutations +######################################################### +#============= +# Data: Input +#============== +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +#cat("\nSourced plotting cols as well:", length(plotting_cols)) + +#################################################### +class(merged_df3) + +df3 = subset(merged_df3, select = -c(pos_count)) + +#======= +# output +#======= +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +cat("plots will output to:", outdir_images) + +########################################################## +# blue, red bp +sts = 8 +lts = 8 +ats = 8 +als = 8 +ltis = 8 +geom_ls = 2.2 + +#pos_count +subtitle_size = 8 +geom_ls_pc = 2.2 +leg_text_size = 8 +axis_text_size = 8 +axis_label_size = 8 + +########################################################### +#------------------------------ +# plot default sizes +#------------------------------ +#========================= +# Affinity outcome +# check this var: outcome_cols_affinity +# get from preformatting or put in globals +#========================== +DistCutOff +LigDist_colname # = "ligand_distance" # from globals +ppi2Dist_colname +naDist_colname + +########################################################### +# get plotting data within the distance +df3_lig = df3[df3[[LigDist_colname]]10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["nca_distance"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) +head(str_df) + +scaled_cols_tc = other_cols[grep("scaled", other_cols)] + + +################################################ +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df[[pos_colname]]==x,y] +} + +for (i in unique(str_df[[pos_colname]]) ){ + print(i) + #cat(length(unique(str_df[[pos_colname]]))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df[[pos_colname]]==i,'abs_max_effect'] = biggest + str_df[str_df[[pos_colname]]==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df[[pos_colname]]==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + ind = rownames(which(abs(str_df[str_df[[pos_colname]]==i,c(pos_colname,effect_name)][effect_name])== biggest, arr.ind=T)) + + str_df[str_df[[pos_colname]]==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +colnames(str_df) +table(str_df$effect_type) + +# check +str_df_check = str_df[str_df[[pos_colname]]%in%c(24, 32, 160, 303, 334),] + +#================ +# for Plots +#================ +str_df_short = str_df[, c("mutationinformation", + #"position", + pos_colname, + "sensitivity" + , "effect_type" + , "effect_sign")] + +table(str_df_short$effect_type) +table(str_df_short$effect_sign) +str(str_df_short) + +# assign pe outcome +str_df_short$pe_outcome = ifelse(str_df_short$effect_sign<0, "DD", "SS") +table(str_df_short$pe_outcome ) +table(str_df_short$effect_sign) + +#============== +# group effect type: +# lig, ppi2, nuc. acid, stability +#============== +affcols = c("affinity_scaled", "mmcsm_lig_scaled") + +#lig +table(str_df_short$effect_type) +str_df_short$effect_grouped = ifelse(str_df_short$effect_type%in%affcols + , "lig" + , str_df_short$effect_type) +table(str_df_short$effect_grouped) + + +#stability +str_df_short$effect_grouped = ifelse(!str_df_short$effect_grouped%in%c("lig") + , "stability" + , str_df_short$effect_grouped) + +table(str_df_short$effect_grouped) + +# create a sign as well +str_df_short$pe_effect_outcome = paste0(str_df_short$pe_outcome, "_" + , str_df_short$effect_grouped) + +table(str_df_short$pe_effect_outcome) + +##################################################################### +# Chimera: for colouring +#################################################################### + +#------------------------------------- +# get df with unique position +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df_short[!duplicated(str_df[[pos_colname]]),] + +if (nrow(str_df_plot) == length(unique(str_df[[pos_colname]]))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c(pos_colname, + "sensitivity", + "pe_outcome", + "effect_grouped", + "pe_effect_outcome")] +head(str_df_plot_cols) + +# colour intensity based on sign +#str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$pe_outcome=="DD", "bright", "light") + +table(str_df_plot_cols$colour_hue); table(str_df_plot$pe_outcome) +head(str_df_plot_cols) + +# colour based on effect +table(str_df_plot_cols$pe_effect_outcome) + +pe_colour_map = c("DD_lig" = "#f0e68c" # khaki + , "SS_lig" = "#ffd700" # gold + + , "DD_nucleic_acid"= "#d2b48c" # sandybrown + , "SS_nucleic_acid"= "#a0522d" # sienna + + , "DD_ppi2" = "#da70d6" # orchid + , "SS_ppi2" = "#ff1493" # deeppink + + , "DD_stability" = "#f8766d" # red + , "SS_stability" = "#00BFC4") # blue + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$pe_effect_outcome + ,function(x){pe_colour_map[[x]]} + )) +head(str_df_plot_cols$colour_map) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +# str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue +# , "_" +# , str_df_plot_cols$colour_map) +# head(str_df_plot_cols$colours) +# table(str_df_plot_cols$colours) +# +# +# class(str_df_plot_cols$colour_map) +# str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +chain_suffix = ".A" +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols[[pos_colname]], chain_suffix) +table(str_df_plot_cols$colour_map) +table(str_df_plot_cols$pe_effect_outcome) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +# -ve Lig Aff +dd_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_lig",] +if (nrow(dd_lig) == table(str_df_plot_cols$pe_effect_outcome)[['DD_lig']]){ + dd_lig_pos = dd_lig[[pos_colname]] +}else{ + stop("Abort: DD affinity colour numbers mismtatch") +} +toString(paste0(dd_lig_pos, chain_suffix)) + +# +ve Lig Aff +ss_lig = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_lig",] +if (!empty(ss_lig)){ + if (nrow(ss_lig) == table(str_df_plot_cols$pe_effect_outcome)[['SS_lig']]){ + ss_lig_pos = ss_lig[[pos_colname]] + }else{ + stop("Abort: SS affinity colour numbers mismtatch") + } + #put in chimera cmd + toString(paste0(ss_lig_pos, chain_suffix)) +} + +#========================================================= +#------------------------ +# Stability +#------------------------ +# -ve Stability +dd_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="DD_stability",] +if (nrow(dd_stability) == table(str_df_plot_cols$pe_effect_outcome)[['DD_stability']]){ + dd_stability_pos = dd_stability[[pos_colname]] +}else{ + stop("Abort: DD Stability colour numbers mismtatch") +} + +# +ve Stability +ss_stability = str_df_plot_cols[str_df_plot_cols$pe_effect_outcome=="SS_stability",] +if (nrow(ss_stability) == table(str_df_plot_cols$pe_effect_outcome)[['SS_stability']]){ + ss_stability_pos = ss_stability[[pos_colname]] +}else{ + stop("Abort: SS Stability colour numbers mismtatch") +} + +#put in chimera cmd +toString(paste0(dd_stability_pos, chain_suffix)) +toString(paste0(ss_stability_pos, chain_suffix)) +#################################################################### + diff --git a/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R b/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R new file mode 100644 index 0000000..66caef2 --- /dev/null +++ b/scripts/plotting/plotting_thesis/pnca/sensitivity_count_pnca.R @@ -0,0 +1,65 @@ +#========================= +# Count Sensitivity +# Mutations and positions +#========================= +pos_colname_c ="position" + +sensP_df = merged_df3[,c("mutationinformation", + #"position", + pos_colname_c, + "sensitivity")] + +head(sensP_df) +table(sensP_df$sensitivity) + +#--------------- +# Total unique positions +#---------------- +tot_mut_pos = length(unique(sensP_df[[pos_colname_c]])) +cat("\nNo of Tot muts sites:", tot_mut_pos) + +# resistant mut pos +sens_site_allR = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="R"] +sens_site_UR = unique(sens_site_allR) +length(sens_site_UR) + +# Sensitive mut pos +sens_site_allS = sensP_df[[pos_colname_c]][sensP_df$sensitivity=="S"] +sens_site_US = unique(sens_site_allS) +length(sens_site_UR) + +#--------------- +# Common Sites +#---------------- +common_pos = intersect(sens_site_UR,sens_site_US) +site_Cc = length(common_pos) +cat("\nNo of Common sites:", site_Cc + , "\nThese are:", common_pos) + +#--------------- +# Resistant muts +#---------------- +site_R = sens_site_UR[!sens_site_UR%in%common_pos] +site_Rc = length(site_R) + +if ( length(sens_site_allR) == table(sensP_df$sensitivity)[['R']] ){ + cat("\nNo of R muts:", length(sens_site_allR) + , "\nNo. of R sites:",site_Rc + , "\nThese are:", site_R +) +} + +#--------------- +# Sensitive muts +#---------------- +site_S = sens_site_US[!sens_site_US%in%common_pos] +site_Sc = length(site_S) + +if ( length(sens_site_allS) == table(sensP_df$sensitivity)[['S']] ){ + cat("\nNo of S muts:", length(sens_site_allS) + , "\nNo. of S sites:", site_Sc + , "\nThese are:", site_S) +} + +######################### +