calculating mean stabilty per position
please enter the commit message for your changes. Lines starting
This commit is contained in:
parent
bf4a427239
commit
38759c6b0c
5 changed files with 172 additions and 9 deletions
|
@ -17,7 +17,7 @@ spec = matrix(c(
|
|||
"gene" , "g", 1, "character"
|
||||
), byrow = TRUE, ncol = 4)
|
||||
|
||||
opt = getopt(spec);
|
||||
opt = getopt(spec)
|
||||
|
||||
drug = opt$drug
|
||||
gene = opt$gene
|
||||
|
|
|
@ -81,9 +81,9 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati
|
|||
my_df_u = my_df
|
||||
}
|
||||
|
||||
#upos = unique(my_df_u$position)
|
||||
upos = unique(my_df_u$position)
|
||||
cat('Dim of clean df:'); cat(dim(my_df_u))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(unique(my_df_u$position)))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(upos))
|
||||
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
|
|
|
@ -17,9 +17,9 @@ getwd()
|
|||
########################################################################
|
||||
# Read file: call script for combining df for PS #
|
||||
########################################################################
|
||||
|
||||
#source("../combining_two_df.R")
|
||||
#?????????????
|
||||
#
|
||||
|
||||
#########################################################
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
drug = 'pyrazinamide'
|
||||
|
@ -87,9 +87,9 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati
|
|||
my_df_u = my_df
|
||||
}
|
||||
|
||||
#upos = unique(my_df_u$position)
|
||||
upos = unique(my_df_u$position)
|
||||
cat('Dim of clean df:'); cat(dim(my_df_u))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(unique(my_df_u$position)))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(upos))
|
||||
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
|
|
163
scripts/plotting/mcsm_mean_stability.R
Normal file
163
scripts/plotting/mcsm_mean_stability.R
Normal file
|
@ -0,0 +1,163 @@
|
|||
getwd()
|
||||
setwd('~/git/LSHTM_analysis/scripts/plotting')
|
||||
getwd()
|
||||
|
||||
#########################################################
|
||||
# TASK:
|
||||
|
||||
#########################################################
|
||||
source("Header_TT.R")
|
||||
require(data.table)
|
||||
require(dplyr)
|
||||
#========================================================
|
||||
# Read file: call script for combining df for PS
|
||||
#source("../combining_two_df.R")
|
||||
|
||||
#???????????
|
||||
|
||||
#========================================================
|
||||
#%% variable assignment: input and output paths & filenames
|
||||
drug = 'pyrazinamide'
|
||||
gene = 'pncA'
|
||||
gene_match = paste0(gene,'_p.')
|
||||
cat(gene_match)
|
||||
|
||||
#=============
|
||||
# directories
|
||||
#=============
|
||||
datadir = paste0('~/git/Data')
|
||||
indir = paste0(datadir, '/', drug, '/input')
|
||||
outdir = paste0('~/git/Data', '/', drug, '/output')
|
||||
|
||||
#======
|
||||
# input
|
||||
#======
|
||||
#in_filename = 'mcsm_complex1_normalised.csv'
|
||||
in_filename_params = paste0(tolower(gene), '_all_params.csv')
|
||||
infile_params = paste0(outdir, '/', in_filename_params)
|
||||
cat(paste0('Input file 1:', infile_params) )
|
||||
|
||||
#=======
|
||||
# output
|
||||
#=======
|
||||
out_filename_mean_stability = paste0(tolower(gene), "_mean_stability.csv")
|
||||
outfile_mean_stability = paste0(outdir, "/", out_filename_mean_stability)
|
||||
print(paste0("Output file:", outfile_mean_stability))
|
||||
|
||||
#%%===============================================================
|
||||
###########################
|
||||
# Read file: struct params
|
||||
###########################
|
||||
cat('Reading struct params including mcsm:', in_filename_params)
|
||||
|
||||
my_df = read.csv(infile_params
|
||||
#, stringsAsFactors = F
|
||||
, header = T)
|
||||
|
||||
cat('Input dimensions:', dim(my_df))
|
||||
|
||||
# clear variables
|
||||
rm(in_filename_params, infile_params)
|
||||
|
||||
# quick checks
|
||||
colnames(my_df)
|
||||
str(my_df)
|
||||
|
||||
# check for duplicate mutations
|
||||
if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformation)){
|
||||
cat(paste0('CAUTION:', ' Duplicate mutations identified'
|
||||
, '\nExtracting these...'))
|
||||
dup_muts = my_df[duplicated(my_df$mutationinformation),]
|
||||
dup_muts_nu = length(unique(dup_muts$mutationinformation))
|
||||
cat(paste0('\nDim of duplicate mutation df:', nrow(dup_muts)
|
||||
, '\nNo. of unique duplicate mutations:', dup_muts_nu
|
||||
, '\n\nExtracting df with unique mutations only'))
|
||||
my_df_u = my_df[!duplicated(my_df$mutationinformation),]
|
||||
}else{
|
||||
cat(paste0('No duplicate mutations detected'))
|
||||
my_df_u = my_df
|
||||
}
|
||||
|
||||
upos = unique(my_df_u$position)
|
||||
cat('Dim of clean df:')
|
||||
cat(dim(my_df_u))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(upos))
|
||||
|
||||
########################################################################
|
||||
# end of data extraction and cleaning for plots #
|
||||
########################################################################
|
||||
#================
|
||||
# Data for plots
|
||||
#================
|
||||
# REASSIGNMENT as necessary
|
||||
df = my_df_u
|
||||
rm(my_df)
|
||||
|
||||
###########################
|
||||
# Data for bfactor figure
|
||||
# PS average
|
||||
# Lig average
|
||||
###########################
|
||||
head(df$position); head(df$mutationinformation)
|
||||
head(df$duet_scaled)
|
||||
|
||||
# order data frame
|
||||
#df = df[order(df$position),] #already done
|
||||
|
||||
head(df$position); head(df$mutationinformation)
|
||||
head(df$duet_scaled)
|
||||
|
||||
#***********
|
||||
# PS: average by position
|
||||
#***********
|
||||
mean_duet_by_position <- df %>%
|
||||
group_by(position) %>%
|
||||
summarize(averaged.duet = mean(duet_scaled))
|
||||
|
||||
#***********
|
||||
# Lig: average by position
|
||||
#***********
|
||||
mean_affinity_by_position <- df %>%
|
||||
group_by(position) %>%
|
||||
summarize(averaged.affinity = mean(affinity_scaled))
|
||||
|
||||
#***********
|
||||
# cbind:mean_duet_by_position and mean_affinity_by_position
|
||||
#***********
|
||||
|
||||
combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position ))
|
||||
|
||||
# sanity check
|
||||
# mean_PS_affinity_Bfactor
|
||||
|
||||
colnames(combined)
|
||||
|
||||
colnames(combined) = c("position"
|
||||
, "average_duet_scaled"
|
||||
, "position2"
|
||||
, "average_affinity_scaled")
|
||||
|
||||
colnames(combined)
|
||||
|
||||
identical(combined$position, combined$position2)
|
||||
|
||||
n = which(colnames(combined) == "position2"); n
|
||||
|
||||
combined_df = combined[,-n]
|
||||
|
||||
max(combined_df$average_duet_scaled) ; min(combined_df$average_duet_scaled)
|
||||
|
||||
max(combined_df$average_affinity_scaled) ; min(combined_df$average_affinity_scaled)
|
||||
|
||||
head(combined_df$position); tail(combined_df$position)
|
||||
#%%============================================================
|
||||
# output
|
||||
write.csv(combined_df, outfile_mean_stability
|
||||
, row.names = F)
|
||||
cat("Finished writing file:\n"
|
||||
, outfile_mean_stability
|
||||
, "\nNo. of rows:", nrow(combined_df)
|
||||
, "\nNo. of cols:", ncol(combined_df))
|
||||
|
||||
# end of script
|
||||
#===============================================================
|
|
@ -80,9 +80,9 @@ if ( length(unique(my_df$mutationinformation)) != length(my_df$mutationinformati
|
|||
my_df_u = my_df
|
||||
}
|
||||
|
||||
#upos = unique(my_df_u$position)
|
||||
upos = unique(my_df_u$position)
|
||||
cat('Dim of clean df:'); cat(dim(my_df_u))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(unique(my_df_u$position)))
|
||||
cat('\nNo. of unique mutational positions:'); cat(length(upos))
|
||||
#======================================================
|
||||
# create a new df with unique position numbers and cols
|
||||
position = unique(my_df$position) #130
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue