rectified mcsm_mean_stability to avereage on raw values and then scale
This commit is contained in:
parent
d76345c3de
commit
54f9fd073b
2 changed files with 94 additions and 59 deletions
|
@ -6,41 +6,29 @@ getwd()
|
||||||
# TASK:
|
# TASK:
|
||||||
|
|
||||||
#########################################################
|
#########################################################
|
||||||
source("Header_TT.R")
|
#source("Header_TT.R")
|
||||||
require(data.table)
|
#require(data.table)
|
||||||
require(dplyr)
|
#require(dplyr)
|
||||||
|
|
||||||
source("plotting_data.R")
|
source("plotting_data.R")
|
||||||
# should return
|
# should return
|
||||||
#my_df
|
#my_df
|
||||||
#my_df_u
|
#my_df_u
|
||||||
#dup_muts
|
#dup_muts
|
||||||
|
|
||||||
|
# cmd parse arguments
|
||||||
|
#require('getopt', quietly = TRUE)
|
||||||
|
#========================================================
|
||||||
|
|
||||||
|
|
||||||
#========================================================
|
#========================================================
|
||||||
# Read file: call script for combining df for PS
|
# Read file: call script for combining df for PS
|
||||||
|
|
||||||
#source("../combining_two_df.R")
|
#source("../combining_two_df.R")
|
||||||
|
|
||||||
#========================================================
|
#========================================================
|
||||||
#%% variable assignment: input and output paths & filenames
|
|
||||||
drug = "pyrazinamide"
|
|
||||||
gene = "pncA"
|
|
||||||
gene_match = paste0(gene,"_p.")
|
|
||||||
cat(gene_match)
|
|
||||||
|
|
||||||
#=============
|
# plotting_data.R imports all the dir names, etc
|
||||||
# directories
|
|
||||||
#=============
|
|
||||||
datadir = paste0("~/git/Data")
|
|
||||||
indir = paste0(datadir, "/", drug, "/input")
|
|
||||||
outdir = paste0("~/git/Data", "/", drug, "/output")
|
|
||||||
|
|
||||||
#======
|
|
||||||
# input
|
|
||||||
#======
|
|
||||||
#in_filename = "mcsm_complex1_normalised.csv"
|
|
||||||
in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
|
||||||
infile_params = paste0(outdir, "/", in_filename_params)
|
|
||||||
cat(paste0("Input file 1:", infile_params) )
|
|
||||||
|
|
||||||
#=======
|
#=======
|
||||||
# output
|
# output
|
||||||
|
@ -60,61 +48,108 @@ rm(my_df)
|
||||||
|
|
||||||
###########################
|
###########################
|
||||||
# Data for bfactor figure
|
# Data for bfactor figure
|
||||||
# PS average
|
# PS (duet) average
|
||||||
# Lig average
|
# Ligand affinity average
|
||||||
###########################
|
###########################
|
||||||
head(df$position); head(df$mutationinformation)
|
head(df$position); head(df$mutationinformation)
|
||||||
head(df$duet_scaled)
|
head(df$duet_stability_change)
|
||||||
|
|
||||||
# order data frame
|
# order data frame
|
||||||
#df = df[order(df$position),] #already done
|
#df = df[order(df$position),] #already done
|
||||||
|
#head(df$position); head(df$mutationinformation)
|
||||||
head(df$position); head(df$mutationinformation)
|
#head(df$duet_stability_change)
|
||||||
head(df$duet_scaled)
|
|
||||||
|
|
||||||
#***********
|
#***********
|
||||||
# PS: average by position
|
# PS(duet): average by position and then scale b/w -1 and 1
|
||||||
|
# column to average: duet_stability_change (NOT scaled!)
|
||||||
#***********
|
#***********
|
||||||
mean_duet_by_position <- df %>%
|
mean_duet_by_position <- df %>%
|
||||||
group_by(position) %>%
|
group_by(position) %>%
|
||||||
summarize(averaged.duet = mean(duet_scaled))
|
summarize(averaged_duet = mean(duet_stability_change))
|
||||||
|
|
||||||
|
# scale b/w -1 and 1
|
||||||
|
duet_min = min(mean_duet_by_position['averaged_duet'])
|
||||||
|
duet_max = max(mean_duet_by_position['averaged_duet'])
|
||||||
|
|
||||||
|
# scale the averaged_duet values
|
||||||
|
mean_duet_by_position['averaged_duet_scaled'] = lapply(mean_duet_by_position['averaged_duet']
|
||||||
|
, function(x) ifelse(x < 0, x/abs(duet_min), x/duet_max))
|
||||||
|
|
||||||
|
cat(paste0('Average duet scores:\n', head(mean_duet_by_position['averaged_duet'])
|
||||||
|
, '\n---------------------------------------------------------------'
|
||||||
|
, '\nScaled duet scores:\n', head(mean_duet_by_position['averaged_duet_scaled'])))
|
||||||
|
|
||||||
|
# sanity checks
|
||||||
|
l_bound_duet = min(mean_duet_by_position['averaged_duet_scaled'])
|
||||||
|
u_bound_duet = max(mean_duet_by_position['averaged_duet_scaled'])
|
||||||
|
|
||||||
|
if ( (l_bound_duet == -1) && (u_bound_duet == 1) ){
|
||||||
|
cat(paste0("PASS: duet scores averaged by position and then scaled"
|
||||||
|
, "\nmin averaged duet: ", l_bound_duet
|
||||||
|
, "\nmax averaged duet: ", u_bound_duet))
|
||||||
|
}else{
|
||||||
|
cat(paste0("FAIL: avergaed duet scores could not be scaled b/w -1 and 1"
|
||||||
|
, "\nmin averaged duet: ", l_bound_duet
|
||||||
|
, "\nmax averaged duet: ", u_bound_duet))
|
||||||
|
quit()
|
||||||
|
}
|
||||||
|
|
||||||
#***********
|
#***********
|
||||||
# Lig: average by position
|
# Lig: average by position and then scale b/w -1 and 1
|
||||||
|
# column: ligand_affinity_change (NOT scaled!)
|
||||||
#***********
|
#***********
|
||||||
mean_affinity_by_position <- df %>%
|
mean_affinity_by_position <- df %>%
|
||||||
group_by(position) %>%
|
group_by(position) %>%
|
||||||
summarize(averaged.affinity = mean(affinity_scaled))
|
summarize(averaged_affinity = mean(ligand_affinity_change))
|
||||||
|
|
||||||
|
# scale b/w -1 and 1
|
||||||
|
affinity_min = min(mean_affinity_by_position['averaged_affinity'])
|
||||||
|
affinity_max = max(mean_affinity_by_position['averaged_affinity'])
|
||||||
|
|
||||||
|
# scale the averaged_affinity values
|
||||||
|
mean_affinity_by_position['averaged_affinity_scaled'] = lapply(mean_affinity_by_position['averaged_affinity']
|
||||||
|
, function(x) ifelse(x < 0, x/abs(affinity_min), x/affinity_max))
|
||||||
|
|
||||||
|
cat(paste0('Average affinity scores:\n', head(mean_affinity_by_position['averaged_affinity'])
|
||||||
|
, '\n---------------------------------------------------------------'
|
||||||
|
, '\nScaled affinity scores:\n', head(mean_affinity_by_position['averaged_affinity_scaled'])))
|
||||||
|
|
||||||
|
# sanity checks
|
||||||
|
l_bound_affinity = min(mean_affinity_by_position['averaged_affinity_scaled'])
|
||||||
|
u_bound_affinity = max(mean_affinity_by_position['averaged_affinity_scaled'])
|
||||||
|
|
||||||
|
if ( (l_bound_affinity == -1) && (u_bound_affinity == 1) ){
|
||||||
|
cat(paste0("PASS: affinity scores averaged by position and then scaled"
|
||||||
|
, "\nmin averaged affintiy: ", l_bound_affinity
|
||||||
|
, "\nmax averaged affintiy: ", u_bound_affinity))
|
||||||
|
}else{
|
||||||
|
cat(paste0("FAIL: avergaed affinity scores could not be scaled b/w -1 and 1"
|
||||||
|
, "\nmin averaged affintiy: ", l_bound_affinity
|
||||||
|
, "\nmax averaged affintiy: ", u_bound_affinity))
|
||||||
|
quit()
|
||||||
|
}
|
||||||
|
|
||||||
#***********
|
#***********
|
||||||
# cbind:mean_duet_by_position and mean_affinity_by_position
|
# merge: mean_duet_by_position and mean_affinity_by_position
|
||||||
#***********
|
#***********
|
||||||
|
common_cols = intersect(colnames(mean_duet_by_position), colnames(mean_affinity_by_position))
|
||||||
|
|
||||||
combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position ))
|
if (dim(mean_duet_by_position) && dim(mean_affinity_by_position)){
|
||||||
|
print(paste0("PASS: dim's match, mering dfs by column :", common_cols))
|
||||||
|
#combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position ))
|
||||||
|
combined_df = as.data.frame(merge(mean_duet_by_position
|
||||||
|
, mean_affinity_by_position
|
||||||
|
, by = common_cols
|
||||||
|
, all = T))
|
||||||
|
|
||||||
# sanity check
|
cat(paste0("\nnrows combined_df:", nrow(combined_df)
|
||||||
# mean_PS_affinity_Bfactor
|
, "\nnrows combined_df:", ncol(combined_df)))
|
||||||
|
}else{
|
||||||
colnames(combined)
|
cat(paste0("FAIL: dim's mismatch, aborting cbind!"
|
||||||
|
, "\nnrows df1:", nrow(mean_duet_by_position)
|
||||||
colnames(combined) = c("position"
|
, "\nnrows df2:", nrow(mean_affinity_by_position)))
|
||||||
, "average_duet_scaled"
|
quit()
|
||||||
, "position2"
|
}
|
||||||
, "average_affinity_scaled")
|
|
||||||
|
|
||||||
colnames(combined)
|
|
||||||
|
|
||||||
identical(combined$position, combined$position2)
|
|
||||||
|
|
||||||
n = which(colnames(combined) == "position2"); n
|
|
||||||
|
|
||||||
combined_df = combined[,-n]
|
|
||||||
|
|
||||||
max(combined_df$average_duet_scaled) ; min(combined_df$average_duet_scaled)
|
|
||||||
|
|
||||||
max(combined_df$average_affinity_scaled) ; min(combined_df$average_affinity_scaled)
|
|
||||||
|
|
||||||
head(combined_df$position); tail(combined_df$position)
|
|
||||||
#%%============================================================
|
#%%============================================================
|
||||||
# output
|
# output
|
||||||
write.csv(combined_df, outfile_mean_stability
|
write.csv(combined_df, outfile_mean_stability
|
||||||
|
|
|
@ -79,7 +79,7 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati
|
||||||
|
|
||||||
upos = unique(my_df_u$position)
|
upos = unique(my_df_u$position)
|
||||||
cat("Dim of clean df:"); cat(dim(my_df_u))
|
cat("Dim of clean df:"); cat(dim(my_df_u))
|
||||||
cat("\nNo. of unique mutational positions:"); cat(length(upos))
|
cat("\nNo. of unique mutational positions:"); cat(length(upos), "\n")
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
# end of data extraction and cleaning for plots #
|
# end of data extraction and cleaning for plots #
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue