added plotting/other_plots_data.R
This commit is contained in:
parent
1262df40c9
commit
ecbc7541e9
3 changed files with 277 additions and 7 deletions
|
@ -76,7 +76,7 @@ print('Input file: ', infile_merged_df3
|
|||
#=======
|
||||
# output
|
||||
#=======
|
||||
out_filename = 'mut_elec_changes.txt'
|
||||
out_filename = gene.lower() + '_mut_elec_changes.txt'
|
||||
outfile_elec_changes = outdir + '/' + out_filename
|
||||
print('Output file: ', outfile_elec_changes
|
||||
, '\n============================================================')
|
||||
|
|
|
@ -35,21 +35,36 @@ write.csv(merged_df3_short, "merged_df3_short.csv")
|
|||
#%%%%%%%%%%%%%%%%%%%%
|
||||
df_ps = merged_df3
|
||||
|
||||
# adding folds scaled values
|
||||
n = which(colnames(df_ps) == "ddg"); n # 10
|
||||
#============================
|
||||
# adding foldx scaled values
|
||||
# scale data b/w -1 and 1
|
||||
#============================
|
||||
n = which(colnames(df_ps) == "ddg"); n
|
||||
|
||||
my_min = min(df_ps[,n]); my_min
|
||||
my_max = max(df_ps[,n]); my_max
|
||||
|
||||
df_ps$foldx_scaled = ifelse(df_ps[,n] < 0
|
||||
, df_ps[,n]/abs(my_min)
|
||||
, df_ps[,n]/my_max) #335, 15
|
||||
, df_ps[,n]/my_max)
|
||||
# sanity check
|
||||
my_min = min(df_ps$foldx_scaled); my_min
|
||||
my_max = max(df_ps$foldx_scaled); my_max
|
||||
|
||||
# assign foldx
|
||||
#ddg<0 = "Stabilising" (-ve)
|
||||
if (my_min == -1 && my_max == 1){
|
||||
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
|
||||
, "\nProceeding with assigning foldx outcome category")
|
||||
}else{
|
||||
cat("FAIL: could not scale foldx ddg values"
|
||||
, "Aborting!")
|
||||
}
|
||||
|
||||
|
||||
#================================
|
||||
# adding foldx outcome category
|
||||
# ddg<0 = "Stabilising" (-ve)
|
||||
#=================================
|
||||
|
||||
c1 = table(df_ps$ddg < 0)
|
||||
df_ps$foldx_outcome = ifelse(df_ps$ddg < 0, "Stabilising", "Destabilising")
|
||||
c2 = table(df_ps$ddg < 0)
|
||||
|
|
255
scripts/plotting/output_tables.R
Normal file
255
scripts/plotting/output_tables.R
Normal file
|
@ -0,0 +1,255 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: producing boxplots for dr and other muts
|
||||
|
||||
#########################################################
|
||||
#=======================================================================
|
||||
# working dir and loading libraries
|
||||
getwd()
|
||||
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
getwd()
|
||||
|
||||
#source("Header_TT.R")
|
||||
library(ggplot2)
|
||||
library(data.table)
|
||||
library(dplyr)
|
||||
|
||||
#=========
|
||||
# Input
|
||||
#=========
|
||||
source("combining_dfs_plotting.R")
|
||||
|
||||
rm(merged_df2, merged_df2_comp, merged_df2_lig, merged_df2_comp_lig
|
||||
, merged_df3_comp, merged_df3_comp_lig, merged_df3_lig
|
||||
, my_df_u, my_df_u_lig)
|
||||
|
||||
|
||||
#=========
|
||||
# Output
|
||||
#=========
|
||||
mut_landscape_data = paste0(tolower(gene), "_merged_df3_short.csv")
|
||||
outfile_mut_landscape_data = paste0(outdir, "/" , mut_landscape_data)
|
||||
outfile_mut_landscape_data
|
||||
|
||||
all_muts_table = paste0(tolower(gene), "_table_all_muts.csv")
|
||||
outfile_all_muts_table = paste0(outdir, "/" , all_muts_table)
|
||||
outfile_all_muts_table
|
||||
|
||||
#########################################################################
|
||||
|
||||
# {RESULT PS: count of dr and other muts}
|
||||
table(merged_df3$mutation_info)
|
||||
|
||||
# sanity checks: Ensure dr and other muts are indeed unique
|
||||
dr_muts = merged_df3[merged_df3$mutation_info == dr_muts_col,]
|
||||
dr_muts_names = unique(dr_muts$mutation)
|
||||
|
||||
other_muts = merged_df3[merged_df3$mutation_info == other_muts_col,]
|
||||
other_muts_names = unique(other_muts$mutation)
|
||||
|
||||
if (table(dr_muts_names%in%other_muts_names)[[1]] == length(dr_muts_names) &&
|
||||
table(other_muts_names%in%dr_muts_names)[[1]] == length(other_muts_names)){
|
||||
cat("PASS: dr and other muts are indeed unique")
|
||||
}else{
|
||||
cat("FAIL: dr and others muts are NOT unique!")
|
||||
quit()
|
||||
}
|
||||
|
||||
# {RESULT LIG: count of dr and other muts}
|
||||
table(merged_df3$mutation_info[merged_df3$ligand_distance<10])
|
||||
|
||||
#===========
|
||||
# Outfile 1: mut_landscape_data
|
||||
#===========
|
||||
# select columns
|
||||
cols_mut_landscape = c("mutation", "mutationinformation"
|
||||
, "wild_type", "position", "mutant_type"
|
||||
, "mutation_info")
|
||||
|
||||
merged_df3_short = merged_df3[,cols_mut_landscape]
|
||||
|
||||
#-----------
|
||||
# write file 1
|
||||
#-----------
|
||||
|
||||
#write.csv(merged_df3_short, outfile_mut_landscape_data)
|
||||
|
||||
cat("Mutational landscape csv written:", outfile_mut_landscape_data
|
||||
, "\nnrows:", nrow(merged_df3_short)
|
||||
, "\nncols:", ncol(merged_df3_short))
|
||||
|
||||
|
||||
#########################################################################
|
||||
|
||||
#===========
|
||||
# Outfile 2: all_muts_table
|
||||
#===========
|
||||
|
||||
#%%%%%%%%%%%%%%%%%
|
||||
# REASSIGNMENT
|
||||
df = merged_df3
|
||||
#%%%%%%%%%%%%%%%%%%
|
||||
|
||||
#============================
|
||||
# adding foldx scaled values
|
||||
# scale data b/w -1 and 1
|
||||
#============================
|
||||
n = which(colnames(df) == "ddg"); n
|
||||
|
||||
my_min = min(df[,n]); my_min
|
||||
my_max = max(df[,n]); my_max
|
||||
|
||||
df$foldx_scaled = ifelse(df[,n] < 0
|
||||
, df[,n]/abs(my_min)
|
||||
, df[,n]/my_max)
|
||||
# sanity check
|
||||
my_min = min(df$foldx_scaled); my_min
|
||||
my_max = max(df$foldx_scaled); my_max
|
||||
|
||||
if (my_min == -1 && my_max == 1){
|
||||
cat("PASS: foldx ddg successfully scaled b/w -1 and 1"
|
||||
, "\nProceeding with assigning foldx outcome category")
|
||||
}else{
|
||||
cat("FAIL: could not scale foldx ddg values"
|
||||
, "Aborting!")
|
||||
}
|
||||
|
||||
#================================
|
||||
# adding foldx outcome category
|
||||
# ddg<0 = "Stabilising" (-ve)
|
||||
#=================================
|
||||
c1 = table(df$ddg < 0)
|
||||
df$foldx_outcome = ifelse(df$ddg < 0, "Stabilising", "Destabilising")
|
||||
c2 = table(df$ddg < 0)
|
||||
|
||||
if ( all(c1 == c2) ){
|
||||
cat("PASS: foldx outcome successfully created")
|
||||
}else{
|
||||
cat("FAIL: foldx outcome could not be created. Aborting!")
|
||||
exit()
|
||||
}
|
||||
|
||||
#=================================
|
||||
# change labels of mut_info column
|
||||
#=================================
|
||||
df$mutation_info = as.factor(df$mutation_info)
|
||||
levels(df$mutation_info);table(df$mutation_info)
|
||||
|
||||
levels(df$mutation_info)[levels(df$mutation_info) == dr_muts_col] <- "drug associated"
|
||||
levels(df$mutation_info)[levels(df$mutation_info) == other_muts_col] <- "others"
|
||||
|
||||
levels(df$mutation_info);table(df$mutation_info)
|
||||
|
||||
#================================
|
||||
# adding negative log p values and % af
|
||||
# pval_fisher AND pwald_kin
|
||||
#=================================
|
||||
df$af_percent = df$af*100
|
||||
df$neglog10_pval_fisher = -log10(df$pval_fisher)
|
||||
df$neglog10_pwald_kin = -log10(df$pwald_kin)
|
||||
|
||||
# Order df: by position
|
||||
df$position; head(df$mutationinformation)
|
||||
df_ordered = df[order(df$position),]
|
||||
df_ordered$position;head(df_ordered$mutationinformation)
|
||||
|
||||
# select cols in desired order
|
||||
cols_all_muts_table = c("mutationinformation"
|
||||
, "mutation_info"
|
||||
#, "af"
|
||||
, "af_percent"
|
||||
, "or_mychisq"
|
||||
#, "neglog10_pval_fisher"
|
||||
, "pval_fisher"
|
||||
, "or_kin"
|
||||
#, "neglog10_pwald_kin"
|
||||
, "pwald_kin"
|
||||
, "duet_stability_change"
|
||||
, "duet_outcome"
|
||||
, "ligand_distance"
|
||||
, "ligand_affinity_change"
|
||||
, "ligand_outcome"
|
||||
, "ddg"
|
||||
, "foldx_outcome"
|
||||
, "asa"
|
||||
, "rsa"
|
||||
, "kd_values"
|
||||
, "rd_values")
|
||||
|
||||
cat("\nSelecting", length(cols_all_muts_table), "columns for output"
|
||||
, "\nThese are ordered by position")
|
||||
|
||||
if(all(cols_all_muts_table%in% colnames(df_ordered))){
|
||||
cat("PASS: columns selected are present"
|
||||
, "\nProceeding to subset")
|
||||
}else{
|
||||
cat("\nFAIL: columns selected are not present in original df"
|
||||
, "Aborting!")
|
||||
quit()
|
||||
}
|
||||
|
||||
df_output = df_ordered[,cols_all_muts_table]
|
||||
|
||||
# Assign pretty column names
|
||||
#angstroms_symbol = "\u212b" # already imported but just in case
|
||||
delta_symbol = "\u0394"; delta_symbol
|
||||
|
||||
ligand_dist_colname = paste0("Distance to ligand (", angstroms_symbol, ")")
|
||||
ligand_dist_colname
|
||||
|
||||
foldx_colname = paste0("Foldx ", delta_symbol, delta_symbol, "G" )
|
||||
foldx_colname
|
||||
|
||||
#duet_colname = "DUET (Kcal/Mol)"
|
||||
duet_colname = paste0("DUET ", delta_symbol, delta_symbol, "G" )
|
||||
duet_colname
|
||||
|
||||
colnames(df_output)
|
||||
my_pretty_colnames = c("Mutation"
|
||||
, "Mutation class"
|
||||
, "AF(%)"
|
||||
, "OR"
|
||||
, "P-value"
|
||||
, "OR adjusted"
|
||||
, "P-wald"
|
||||
, duet_colname
|
||||
, "DUET outcome"
|
||||
, ligand_dist_colname
|
||||
, "mCSM-lig (log affinity)"
|
||||
, "Ligand outcome"
|
||||
, foldx_colname
|
||||
, "Foldx outcome"
|
||||
, "ASA"
|
||||
, "RSA"
|
||||
, "Hydrophobicity"
|
||||
, "Residue depth")
|
||||
|
||||
# MANUAL checkpoint...
|
||||
check_colnames = as.data.frame(cbind(colnames(df_output), my_pretty_colnames))
|
||||
|
||||
if (length(my_pretty_colnames) == length(df_output)){
|
||||
cat("PASS: pretty colnames generated for output columns"
|
||||
, "\nAssigning pretty colnames to output df")
|
||||
colnames(df_output) = my_pretty_colnames
|
||||
}else{
|
||||
cat("FAIL:length of colnames mismatch"
|
||||
, "Expected length:", length(df_output)
|
||||
, "\nGot:", length(my_pretty_colnames))
|
||||
}
|
||||
|
||||
colnames(df_output)
|
||||
|
||||
#-----------
|
||||
# write file 2
|
||||
#-----------
|
||||
|
||||
#write.csv(df_output, outfile_all_muts_table, row.names = FALSE)
|
||||
|
||||
cat("\nOutput table (csv) written:", outfile_all_muts_table
|
||||
, "\nnrows:", nrow(df_output)
|
||||
, "\nncols:", ncol(df_output))
|
||||
|
||||
############################################################################
|
||||
|
||||
# clear excess variables
|
||||
|
Loading…
Add table
Add a link
Reference in a new issue