128 lines
3.2 KiB
R
128 lines
3.2 KiB
R
getwd()
|
|
setwd("~/git/LSHTM_analysis/scripts/plotting")
|
|
getwd()
|
|
|
|
#########################################################
|
|
# TASK:
|
|
|
|
#########################################################
|
|
source("Header_TT.R")
|
|
require(data.table)
|
|
require(dplyr)
|
|
source("plotting_data.R")
|
|
# should return
|
|
#my_df
|
|
#my_df_u
|
|
#dup_muts
|
|
|
|
#========================================================
|
|
# Read file: call script for combining df for PS
|
|
|
|
#source("../combining_two_df.R")
|
|
|
|
#========================================================
|
|
#%% variable assignment: input and output paths & filenames
|
|
drug = "pyrazinamide"
|
|
gene = "pncA"
|
|
gene_match = paste0(gene,"_p.")
|
|
cat(gene_match)
|
|
|
|
#=============
|
|
# directories
|
|
#=============
|
|
datadir = paste0("~/git/Data")
|
|
indir = paste0(datadir, "/", drug, "/input")
|
|
outdir = paste0("~/git/Data", "/", drug, "/output")
|
|
|
|
#======
|
|
# input
|
|
#======
|
|
#in_filename = "mcsm_complex1_normalised.csv"
|
|
in_filename_params = paste0(tolower(gene), "_all_params.csv")
|
|
infile_params = paste0(outdir, "/", in_filename_params)
|
|
cat(paste0("Input file 1:", infile_params) )
|
|
|
|
#=======
|
|
# output
|
|
#=======
|
|
out_filename_mean_stability = paste0(tolower(gene), "_mean_stability.csv")
|
|
outfile_mean_stability = paste0(outdir, "/", out_filename_mean_stability)
|
|
print(paste0("Output file:", outfile_mean_stability))
|
|
|
|
#%%===============================================================
|
|
|
|
#================
|
|
# Data for plots
|
|
#================
|
|
# REASSIGNMENT as necessary
|
|
df = my_df_u
|
|
rm(my_df)
|
|
|
|
###########################
|
|
# Data for bfactor figure
|
|
# PS average
|
|
# Lig average
|
|
###########################
|
|
head(df$position); head(df$mutationinformation)
|
|
head(df$duet_scaled)
|
|
|
|
# order data frame
|
|
#df = df[order(df$position),] #already done
|
|
|
|
head(df$position); head(df$mutationinformation)
|
|
head(df$duet_scaled)
|
|
|
|
#***********
|
|
# PS: average by position
|
|
#***********
|
|
mean_duet_by_position <- df %>%
|
|
group_by(position) %>%
|
|
summarize(averaged.duet = mean(duet_scaled))
|
|
|
|
#***********
|
|
# Lig: average by position
|
|
#***********
|
|
mean_affinity_by_position <- df %>%
|
|
group_by(position) %>%
|
|
summarize(averaged.affinity = mean(affinity_scaled))
|
|
|
|
#***********
|
|
# cbind:mean_duet_by_position and mean_affinity_by_position
|
|
#***********
|
|
|
|
combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position ))
|
|
|
|
# sanity check
|
|
# mean_PS_affinity_Bfactor
|
|
|
|
colnames(combined)
|
|
|
|
colnames(combined) = c("position"
|
|
, "average_duet_scaled"
|
|
, "position2"
|
|
, "average_affinity_scaled")
|
|
|
|
colnames(combined)
|
|
|
|
identical(combined$position, combined$position2)
|
|
|
|
n = which(colnames(combined) == "position2"); n
|
|
|
|
combined_df = combined[,-n]
|
|
|
|
max(combined_df$average_duet_scaled) ; min(combined_df$average_duet_scaled)
|
|
|
|
max(combined_df$average_affinity_scaled) ; min(combined_df$average_affinity_scaled)
|
|
|
|
head(combined_df$position); tail(combined_df$position)
|
|
#%%============================================================
|
|
# output
|
|
write.csv(combined_df, outfile_mean_stability
|
|
, row.names = F)
|
|
cat("Finished writing file:\n"
|
|
, outfile_mean_stability
|
|
, "\nNo. of rows:", nrow(combined_df)
|
|
, "\nNo. of cols:", ncol(combined_df))
|
|
|
|
# end of script
|
|
#===============================================================
|