sorted subcols_axis script to generate correct axis cols for both PS and lig plots

This commit is contained in:
Tanushree Tunstall 2020-08-26 16:39:10 +01:00
parent 2e53c8007a
commit e0f14ed266
9 changed files with 117 additions and 81 deletions

47
scripts/plotting/barplots_subcolours_aa_PS.R Normal file → Executable file
View file

@ -1,3 +1,4 @@
#!/usr/bin/env Rscript
getwd()
setwd("~/git/LSHTM_analysis/scripts/plotting")
getwd()
@ -42,14 +43,30 @@ cat(paste0("Variables imported:"
, "\nAngstrom symbol:", angstroms_symbol))
# clear excess variable
rm(my_df, upos, dup_muts, my_df_u_lig)
rm(dup_muts_cols, mut_pos_cols_lig, my_df_cols, my_df_u_cols_lig, upos)
#=======================================================================
# !!! very important!!!!
#================
# Inspecting mut_pos_cols
# position numbers and colours
# position numbers and colours and assigning axis colours based on lab_fg
# of the correct df
# open file from desktop ("sample_axis_cols") for cross checking
#================
# very important!
#my_axis_colours = mut_pos_cols$lab_fg
if ( nrow(mut_pos_cols) == length(unique(my_df_u_cols$position)) ){
print("PASS: lengths checked, assigning axis colours")
my_axis_colours = mut_pos_cols$lab_fg
cat("length of axis colours:", length(my_axis_colours)
, "\nwhich corresponds to the number of positions on the x-axis of the plot")
}else{
print("FAIL:lengths mismatch, could not assign axis colours")
quit()
}
# further sanity checks
table(mut_pos_cols$lab_bg)
check_lab_bg = sum( table(mut_pos_cols$lab_bg) ) == nrow(mut_pos_cols) # should be True
check_lab_bg
@ -70,12 +87,6 @@ if (check_lab_bg && check_lab_bg2 && check_lab_fg) {
quit()
}
# very important!
my_axis_colours = mut_pos_cols$lab_fg
# now clear mut_pos_cols
rm(mut_pos_cols)
#=======
# output
#=======
@ -89,13 +100,13 @@ plot_bp_aa_subcols_duet = paste0(plotdir, "/", bp_aa_subcols_duet)
# Data for plots
#================
# REASSIGNMENT as necessary
df = my_df_u
df = my_df_u_cols
# sanity checks
str(df)
###########################
# 2: Plot: DUET scores
# Plot: DUET scores
###########################
#==========================
@ -137,7 +148,7 @@ snp_count = sort(unique(snpsBYpos_df$snpsBYpos))
if (is.factor(df$duet_outcome)){
print("duet_outcome is factor")
}else{
print("convert duet_outcome to factor")
print("converting duet_outcome to factor")
df$duet_outcome = as.factor(df$duet_outcome)
}
@ -165,25 +176,17 @@ tapply(df$duet_scaled, df$duet_outcome, max)
u = unique(df$duet_scaled)
cat("No. of unique values in normalised data:", length(u))
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
# Run this section if rounding is to be used
# specify number for rounding
#n = 3
#df$duet_scaledR = round(df$duet_scaled, n)
#ur = unique(df$duet_scaledR)
# create an extra column called group which contains the "gp name and score"
# Define group
# Create an extra column called group which contains the "gp name and score"
# so colours can be generated for each unique values in this column
#my_grp = df$duet_scaledR # rounding
my_grp = df$duet_scaled # no rounding
#%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
df$group <- paste0(df$duet_outcome, "_", my_grp, sep = "")
# Call the function to create the palette based on the group defined above
colours <- ColourPalleteMulti(df, "duet_outcome", "my_grp")
print(paste0("Colour palette generated for: ", length(colours), " colours"))
my_title = "Protein stability (DUET)"
cat("No. of axis colours: ", length(my_axis_colours))
#========================
# plot with axis colours