diff --git a/scripts/plotting/plotting_thesis/prominent_effects.R b/scripts/plotting/plotting_thesis/prominent_effects.R new file mode 100644 index 0000000..a69d092 --- /dev/null +++ b/scripts/plotting/plotting_thesis/prominent_effects.R @@ -0,0 +1,346 @@ +#!/usr/bin/env Rscript +#source("~/git/LSHTM_analysis/config/alr.R") +source("~/git/LSHTM_analysis/config/embb.R") +#source("~/git/LSHTM_analysis/config/katg.R") +#source("~/git/LSHTM_analysis/config/gid.R") +#source("~/git/LSHTM_analysis/config/pnca.R") +#source("~/git/LSHTM_analysis/config/rpob.R") + +# get plotting dfs +source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") + +source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") + +length(all_stability_cols); length(raw_stability_cols) +length(scaled_stability_cols); length(outcome_stability_cols) +length(affinity_dist_colnames) + +static_cols = c("mutationinformation", "position", "sensitivity") +other_cols_all = c(scaled_stability_cols, scaled_affinity_cols, affinity_dist_colnames) + +#omit avg cols and foldx_scaled_signC cols +other_cols = other_cols_all[grep("avg", other_cols_all, invert = T)] +other_cols = other_cols[grep("foldx_scaled_signC",other_cols, invert = T )] +other_cols + +cols_to_extract = c(static_cols, other_cols) +expected_ncols = length(static_cols) + length(other_cols) + +str_df = merged_df3[, cols_to_extract] + +if (ncol(str_df) == expected_ncols){ + cat("\nPASS: successfully extracted cols for calculating prominent effects") +}else{ + stop("\nAbort: Could not extract cols for calculating prominent effects") +} + +#========================= +# Masking affinity columns +#========================= +# First make values for affinity cols 0 when their corresponding dist >10 +head(str_df) + +# replace in place affinity values >10 +str_df[str_df["ligand_distance"]>10,"affinity_scaled"]=0 +str_df[str_df["ligand_distance"]>10,"mmcsm_lig_scaled"]=0 + +#ppi2 gene: replace in place ppi2 affinity values where ppi2 dist >10 + +if (tolower(gene)%in%geneL_ppi2){ + str_df[str_df["interface_dist"]>10,"mcsm_ppi2_scaled"]=0 +} + +# na gene: replace in place na affinity values where na dist >10 +if (tolower(gene)%in%geneL_na){ + str_df[str_df["XXXX"]>10,"mcsm_na_scaled"]=0 +} + +colnames(str_df) + +head(str_df) + +# get names of cols to calculate the prominent effects from +scaled_cols_tc = c("duet_scaled", "deepddg_scaled" + , "ddg_dynamut2_scaled", "foldx_scaled","affinity_scaled" + , "mmcsm_lig_scaled" , "mcsm_ppi2_scaled") + +#-------------------------------- +#get rowmax for absolute values +#-------------------------------- +#str_df$row_maximum = apply(str_df[,-1], 1, function(x){max(abs(x))}) +#str_df$row_maximum = apply(str_df[,scaled_cols_tc], 1, function(x){max(abs(x))}) + +#correct +#BOO= abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum']; head(BOO) +#also corr but weird +#POO = which(abs(str_df[,scaled_cols_tc]) == str_df[,'row_maximum'], arr.ind =T); head(POO) + +################################################ +# #------------- +# # short df: try +# #------------- +# df2_short = str_df[str_df$position%in%c(167, 423, 427),] +# df2_short = str_df[str_df$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),] +# df2_short = str_df[str_df$position%in%c(435, 480),] +# +# +# give_col=function(x,y,df=df2_short){ +# df[df$position==x,y] +# } +# +# for (i in unique(df2_short$position) ){ +# print(i) +# #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) +# #cat(length(unique(df2$position))) +# #df2_short[df2_short$position==i,scaled_cols_tc] +# +# biggest = max(abs(give_col(i,scaled_cols_tc))) +# +# df2_short[df2_short$position==i,'abs_max_effect'] = biggest +# df2_short[df2_short$position==i,'effect_type']= names( +# give_col(i,scaled_cols_tc)[which( +# abs( +# give_col(i,scaled_cols_tc) +# ) == biggest, arr.ind=T +# )[, "col"]]) +# +# effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values +# +# # get index/rowname for value of max effect, and then use it to get the original sign +# # here +# #df2_short[df2_short$position==i,c(effect_name)] +# #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) +# ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) +# df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,]) +# } +# +# # ends with suffix 2 if dups +# df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values +# +# View(df2_short) + + + +#=============== +# whole df +#=============== +give_col=function(x,y,df=str_df){ + df[df$position==x,y] +} + +for (i in unique(str_df$position) ){ + print(i) + #cat(length(unique(str_df$position))) + + biggest = max(abs(give_col(i,scaled_cols_tc))) + + str_df[str_df$position==i,'abs_max_effect'] = biggest + str_df[str_df$position==i,'effect_type']= names( + give_col(i,scaled_cols_tc)[which( + abs( + give_col(i,scaled_cols_tc) + ) == biggest, arr.ind=T + )[, "col"]])[1] + + effect_name = unique(str_df[str_df$position==i,'effect_type'])#[1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + ind = rownames(which(abs(str_df[str_df$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + str_df[str_df$position==i,'effect_sign'] = sign(str_df[effect_name][ind,])[1] +} + +# ends with suffix 2 if dups +str_df$effect_type = sub("\\.[0-9]+", "", str_df$effect_type) # cull duplicate effect types that happen when there are exact duplicate values + + +# check +str_df_check = str_df[str_df$position%in%c(24, 32,160, 303, 334 ),] +table(str_df$effect_type) + +#------------------------------------- +# get df with uniqye position +#-------------------------------------- +#data[!duplicated(data$x), ] +str_df_plot = str_df[!duplicated(str_df$position),] + +if (nrow(str_df_plot) == length(unique(str_df$position))){ + cat("\nPASS: successfully extracted df with unique positions") +}else{ + stop("\nAbort: Could not extract df with unique positions") +} + +#------------------------------------- +# generate colours for effect types +#-------------------------------------- +str_df_plot_cols = str_df_plot[, c("position", "sensitivity" + , affinity_dist_colnames + , "abs_max_effect" + , "effect_type" + , "effect_sign")] +head(str_df_plot_cols) + +# colour intensity based on sign +str_df_plot_cols$colour_hue = ifelse(str_df_plot_cols$effect_sign<0, "bright", "light") +table(str_df_plot_cols$colour_hue) +head(str_df_plot_cols) +# colour based on effect +table(str_df_plot_cols$effect_type) + +pe_colour_map = c("affinity_scaled" = "salmon" + , "mmcsm_lig_scaled" = "salmon" + , "mcsm_ppi2_scaled" = "pink" + , "mcsm_na_scaled" = "orange" + , "duet_scaled" = "dimgray" + , "deepddg_scaled" = "dimgray" + , "ddg_dynamut2_scaled"= "dimgray" + , "foldx_scaled" = "dimgray") + +#unlist(d[c('a', 'a', 'c', 'b')], use.names=FALSE) + +#map the colours +str_df_plot_cols$colour_map= unlist(map(str_df_plot_cols$effect_type + ,function(x){pe_colour_map[[x]]} + )) + +str_df_plot_cols$colours = paste0(str_df_plot_cols$colour_hue + , "_" + , str_df_plot_cols$colour_map) +head(str_df_plot_cols$colours) +table(str_df_plot_cols$colours) + + +class(str_df_plot_cols$colour_map) +str(str_df_plot_cols) + +# sort by colour +head(str_df_plot_cols) +str_df_plot_cols = str_df_plot_cols[order(str_df_plot_cols$colour_map), ] +head(str_df_plot_cols) + +#====================================== +# write file with prominent effects +#====================================== +outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") +write.csv(str_df_plot_cols, paste0(outdir_images, tolower(gene), "_prominent_effects.csv")) + +################################ +# printing for chimera +############################### +str_df_plot_cols$pos_chain = paste0(str_df_plot_cols$position, ".B,") +table(str_df_plot_cols$colour_map) + +#=================================================== +#------------------- +# Ligand Affinity +#------------------- +foo = str_df_plot_cols[str_df_plot_cols$colours=="light_salmon",] +all(foo2$effect_sign == 1) + +foo1 = str_df_plot_cols[str_df_plot_cols$colours=="bright_salmon",] +all(foo3$effect_sign == -1) + +#light salmon: stabilising affinity +table(str_df_plot_cols$colours) + +affinity_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_salmon"] +affinity_pos_lc = paste(affinity_pos_l, collapse = "") +affinity_pos_lc +table(str_df_plot_cols$colours)[["light_salmon"]] + +#bright salmon: DEstabilsing affinity +affinity_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_salmon"] +affinity_pos_bc = paste(affinity_pos_b, collapse = "") +affinity_pos_bc +table(str_df_plot_cols$colours)[["bright_salmon"]] + +c1 = length(affinity_pos_l) + length(affinity_pos_b) == table(str_df_plot_cols$colour_map)[["salmon"]] + +if (c1){ + cat("PASS: affinity colour numbers checked") +}else{ + stop("Abort: affinity colour numbers mismtatch") +} + +#put in chimera cmd +affinity_pos_lc +affinity_pos_bc + +#=================================================== +#------------------- +# ppi2 Affinity +#------------------- +foo2 = str_df_plot_cols[str_df_plot_cols$colours=="light_pink",] +all(foo2$effect_sign == 1) + +foo3 = str_df_plot_cols[str_df_plot_cols$colours=="bright_pink",] +all(foo3$effect_sign == -1) + +#light_pink: stabilising affinity +table(str_df_plot_cols$colours) + +ppi2_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_pink"] +ppi2_pos_lc = paste(ppi2_pos_l, collapse = "") +ppi2_pos_lc +table(str_df_plot_cols$colours)[["light_pink"]] + +#bright pink: DEstabilsing affinity +ppi2_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_pink"] +ppi2_pos_bc = paste(ppi2_pos_b, collapse = "") +ppi2_pos_bc +table(str_df_plot_cols$colours)[["bright_pink"]] + +c2 = length(ppi2_pos_l) + length(ppi2_pos_b) == table(str_df_plot_cols$colour_map)[["pink"]] + +if (c2){ + cat("PASS: ppi2 colour numbers checked") +}else{ + stop("Abort: ppi2 colour numbers mismtatch") +} + +#put in chimera cmd +ppi2_pos_lc +ppi2_pos_bc + +#========================================================= +#------------------- +# Stability +#------------------- +foo4 = str_df_plot_cols[str_df_plot_cols$colours=="light_dimgray",] +all(foo2$effect_sign == 1) + +foo5 = str_df_plot_cols[str_df_plot_cols$colours=="bright_dimgray",] +all(foo3$effect_sign == -1) + +#light_dimgray: stabilising Stability +table(str_df_plot_cols$colours) + +stab_pos_l = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="light_dimgray"] +stab_pos_lc = paste(stab_pos_l, collapse = "") +stab_pos_lc +table(str_df_plot_cols$colours)[["light_dimgray"]] + +#bright_dimgray pink: DEstabilsing Stability +stab_pos_b = str_df_plot_cols$pos_chain[str_df_plot_cols$colours=="bright_dimgray"] +stab_pos_bc = paste(stab_pos_b, collapse = "") +stab_pos_bc +table(str_df_plot_cols$colours)[["bright_dimgray"]] + +c3 = length(stab_pos_l) + length(stab_pos_b) == table(str_df_plot_cols$colour_map)[["dimgray"]] + +if (c3){ + cat("PASS: stability colour numbers checked") +}else{ + stop("Abort: stability colour numbers mismtatch") +} + +#put in chimera cmd +stab_pos_lc +stab_pos_bc + + +# stab tool count +table(str_df_plot_cols$effect_type) + +table(str_df_plot_cols$effect_type, str_df_plot_cols$effect_sign) +