80 lines
3 KiB
R
80 lines
3 KiB
R
#!/usr/bin/env Rscript
|
|
#################################################################
|
|
# TASK: Script to add bp colours ~ barplot heatmap
|
|
#################################################################
|
|
|
|
my_df = merged_df3
|
|
|
|
cols_to_select = c("mutationinformation", "drtype"
|
|
, "wild_type"
|
|
, "position"
|
|
, "mutant_type"
|
|
, "chain", "ligand_id", "ligand_distance"
|
|
, "duet_stability_change", "duet_outcome", "duet_scaled"
|
|
, "ligand_affinity_change", "ligand_outcome", "affinity_scaled"
|
|
, "ddg_foldx", "foldx_scaled", "foldx_outcome"
|
|
, "deepddg", "deepddg_outcome" # comment out as not available for pnca
|
|
, "asa", "rsa", "rd_values", "kd_values"
|
|
, "af", "or_mychisq", "pval_fisher"
|
|
, "or_fisher", "or_logistic", "pval_logistic"
|
|
, "wt_prop_water", "mut_prop_water", "wt_prop_polarity", "mut_prop_polarity"
|
|
, "wt_calcprop", "mut_calcprop")
|
|
|
|
#=======================
|
|
# Data for sub colours
|
|
# barplot: PS
|
|
#=======================
|
|
|
|
cat("\nNo. of cols to select:", length(cols_to_select))
|
|
|
|
subcols_df_ps = my_df[, cols_to_select]
|
|
|
|
cat("\nNo of unique positions for ps:"
|
|
, length(unique(subcols_df_ps$position)))
|
|
|
|
# add count_pos col that counts the no. of nsSNPS at a position
|
|
setDT(subcols_df_ps)[, pos_count := .N, by = .(position)]
|
|
|
|
# should be a factor
|
|
if (is.factor(subcols_df_ps$duet_outcome)){
|
|
cat("\nDuet_outcome is factor")
|
|
table(subcols_df_ps$duet_outcome)
|
|
}else{
|
|
cat("\nConverting duet_outcome to factor")
|
|
subcols_df_ps$duet_outcome = as.factor(subcols_df_ps$duet_outcome)
|
|
table(subcols_df_ps$duet_outcome)
|
|
}
|
|
|
|
# should be -1 and 1
|
|
min(subcols_df_ps$duet_scaled)
|
|
max(subcols_df_ps$duet_scaled)
|
|
|
|
tapply(subcols_df_ps$duet_scaled, subcols_df_ps$duet_outcome, min)
|
|
tapply(subcols_df_ps$duet_scaled, subcols_df_ps$duet_outcome, max)
|
|
|
|
# check unique values in normalised data
|
|
cat("\nNo. of unique values in duet scaled, no rounding:"
|
|
, length(unique(subcols_df_ps$duet_scaled)))
|
|
|
|
# No rounding
|
|
my_grp = subcols_df_ps$duet_scaled; length(my_grp)
|
|
|
|
# Add rounding is to be used
|
|
n = 3
|
|
subcols_df_ps$duet_scaledR = round(subcols_df_ps$duet_scaled, n)
|
|
|
|
cat("\nNo. of unique values in duet scaled", n, "places rounding:"
|
|
, length(unique(subcols_df_ps$duet_scaledR)))
|
|
|
|
my_grp_r = subcols_df_ps$duet_scaledR # rounding
|
|
|
|
# Add grp cols
|
|
subcols_df_ps$group <- paste0(subcols_df_ps$duet_outcome, "_", my_grp, sep = "")
|
|
subcols_df_ps$groupR <- paste0(subcols_df_ps$duet_outcome, "_", my_grp_r, sep = "")
|
|
|
|
# Call the function to create the palette based on the group defined above
|
|
subcols_ps <- ColourPalleteMulti(subcols_df_ps, "duet_outcome", "my_grp")
|
|
subcolsR_ps <- ColourPalleteMulti(subcols_df_ps, "duet_outcome", "my_grp_r")
|
|
|
|
cat("Colour palette generated for my_grp: ", length(subcols_ps), " colours")
|
|
cat("Colour palette generated for my_grp_r: ", length(subcolsR_ps), " colours")
|