moved coloured_bp_data.R to redundant in light of updated function and reflected this in notes withing get_plotting_dfs.R

This commit is contained in:
Tanushree Tunstall 2021-09-15 19:42:08 +01:00
parent 96e6e8db5d
commit 1d16c6848e
2 changed files with 5 additions and 0 deletions

View file

@ -0,0 +1,80 @@
#!/usr/bin/env Rscript
#################################################################
# TASK: Script to add bp colours ~ barplot heatmap
#################################################################
my_df = merged_df3
cols_to_select = c("mutationinformation", "drtype"
, "wild_type"
, "position"
, "mutant_type"
, "chain", "ligand_id", "ligand_distance"
, "duet_stability_change", "duet_outcome", "duet_scaled"
, "ligand_affinity_change", "ligand_outcome", "affinity_scaled"
, "ddg_foldx", "foldx_scaled", "foldx_outcome"
, "deepddg", "deepddg_outcome" # comment out as not available for pnca
, "asa", "rsa", "rd_values", "kd_values"
, "af", "or_mychisq", "pval_fisher"
, "or_fisher", "or_logistic", "pval_logistic"
, "wt_prop_water", "mut_prop_water", "wt_prop_polarity", "mut_prop_polarity"
, "wt_calcprop", "mut_calcprop")
#=======================
# Data for sub colours
# barplot: PS
#=======================
cat("\nNo. of cols to select:", length(cols_to_select))
subcols_df_ps = my_df[, cols_to_select]
cat("\nNo of unique positions for ps:"
, length(unique(subcols_df_ps$position)))
# add count_pos col that counts the no. of nsSNPS at a position
setDT(subcols_df_ps)[, pos_count := .N, by = .(position)]
# should be a factor
if (is.factor(subcols_df_ps$duet_outcome)){
cat("\nDuet_outcome is factor")
table(subcols_df_ps$duet_outcome)
}else{
cat("\nConverting duet_outcome to factor")
subcols_df_ps$duet_outcome = as.factor(subcols_df_ps$duet_outcome)
table(subcols_df_ps$duet_outcome)
}
# should be -1 and 1
min(subcols_df_ps$duet_scaled)
max(subcols_df_ps$duet_scaled)
tapply(subcols_df_ps$duet_scaled, subcols_df_ps$duet_outcome, min)
tapply(subcols_df_ps$duet_scaled, subcols_df_ps$duet_outcome, max)
# check unique values in normalised data
cat("\nNo. of unique values in duet scaled, no rounding:"
, length(unique(subcols_df_ps$duet_scaled)))
# No rounding
my_grp = subcols_df_ps$duet_scaled; length(my_grp)
# Add rounding is to be used
n = 3
subcols_df_ps$duet_scaledR = round(subcols_df_ps$duet_scaled, n)
cat("\nNo. of unique values in duet scaled", n, "places rounding:"
, length(unique(subcols_df_ps$duet_scaledR)))
my_grp_r = subcols_df_ps$duet_scaledR # rounding
# Add grp cols
subcols_df_ps$group <- paste0(subcols_df_ps$duet_outcome, "_", my_grp, sep = "")
subcols_df_ps$groupR <- paste0(subcols_df_ps$duet_outcome, "_", my_grp_r, sep = "")
# Call the function to create the palette based on the group defined above
subcols_ps <- ColourPalleteMulti(subcols_df_ps, "duet_outcome", "my_grp")
subcolsR_ps <- ColourPalleteMulti(subcols_df_ps, "duet_outcome", "my_grp_r")
cat("Colour palette generated for my_grp: ", length(subcols_ps), " colours")
cat("Colour palette generated for my_grp_r: ", length(subcolsR_ps), " colours")