added output tables with active site

This commit is contained in:
Tanushree Tunstall 2022-08-26 21:50:33 +01:00
parent f290d8ec9e
commit 2cbc460f87
4 changed files with 43 additions and 17 deletions

View file

@ -1,7 +1,15 @@
#source("~/git/LSHTM_analysis/config/embb.R")
#source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R")
#source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# Input
#=======
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
#=======
# output
#=======
outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/")
cat("\nOutput dir for plots:", outdir_images)
#### corr plot function: ggpairs ####
my_gg_pairs=function(plot_df, plot_title
, tt_args_size = 2.5
, gp_args_size = 2.5){
@ -38,8 +46,10 @@ my_gg_pairs=function(plot_df, plot_title
# face="bold"))
}
#### Data for ggpairs ####
DistCutOff = 10
###########################################################################
geneL_normal = c("pnca")
geneL_na = c("gid", "rpob")
geneL_ppi2 = c("alr", "embb", "katg", "rpob")
@ -53,7 +63,7 @@ corr_plotdf = corr_data_extract(merged_df3
aff_dist_cols = colnames(corr_plotdf)[grep("Dist", colnames(corr_plotdf))]
static_cols = c("Log10(MAF)"
, "Log10(OR)"
#, "Log10(OR)"
)
############################################################
#=============================================
@ -148,7 +158,7 @@ plot_corr_df_aff = my_gg_pairs(corr_df_aff
#, gp_args_size = 4
)
#### Combine plots #####
#### Combine plots (OLD, AB + C) #####
# #png("/home/tanu/tmp/gg_pairs_all.png", height = 6, width=11.75, unit="in",res=300)
# png(paste0(outdir_images
# ,tolower(gene)

View file

@ -136,8 +136,6 @@ bar = bar[, c("mutationinformation"
table(bar$sensitivity)
table(bar$or_mychisq>1&bar$signif_fdr) # sen and res ~ OR
str(bar)
sen = bar[bar$or_mychisq<1,]
sen = na.omit(sen)
@ -162,7 +160,9 @@ if (nrow(bar_or) == nrow(sen1) + nrow(res1) ){
# percent for OR muts
pc_orR = nrow(res1)/(nrow(sen1) + nrow(res1)); pc_orR
cat("\nPercentage of muts with OR>1 i.e resistant:"
cat("Number of R muts with OR>1:", nrow(res1)
, "\nPercentage of muts with OR>1 i.e resistant:"
, pc_orR *100 )
# muts with highest OR
@ -171,22 +171,23 @@ head(bar_or$mutationinformation, 10)
# sort df
bar_or = bar_or[order(bar_or$or_mychisq
, bar_or$ligand_distance
, bar_or$interface_dist
, bar_or$nca_dist
#, bar_or$interface_dist
, decreasing = T), ]
nrow(bar_or)
bar_or$drug_site = ifelse(bar_or$position%in%aa_pos_drug, "drug", "no")
table(bar_or$drug_site)
bar_or$dsl_site = ifelse(bar_or$position%in%aa_pos_dsl, "dsl", "no")
bar_or$rna_site = ifelse(bar_or$position%in%aa_pos_rna, "rna", "no")
table(bar_or$dsl_site)
bar_or$ca_site = ifelse(bar_or$position%in%aa_pos_ca, "ca", "no")
bar_or$sam_site = ifelse(bar_or$position%in%aa_pos_sam, "sam", "no")
table(bar_or$ca_site)
bar_or$cdl_site = ifelse(bar_or$position%in%aa_pos_cdl, "cdl", "no")
bar_or$amp_site = ifelse(bar_or$position%in%aa_pos_amp, "amp", "no")
table(bar_or$cdl_site)
top10_or = bar_or[1:10,]
# are these active sites

View file

@ -285,6 +285,7 @@ if (identical(colnames(mut_h_avs_dd), colnames(mut_h_avs_ss)) ){
#-------------------
# Filtered columns
# most DD/SS: ligand
# FIXME DUBIOUS as min and max can be both negative
#-------------------
df3_effects_lig = df3_effects[df3_effects[[LigDist_colname]]<DistCutOff,]
nrow(df3_effects_lig)
@ -296,7 +297,7 @@ if (identical(colnames(mut_h_lig_dd), colnames(mut_h_lig_ss)) ){
# add cols
mut_h_lig_dd$mutational_effect = "Most Destabilising for Ligand affinity"
mut_h_lig_ss$mutational_effect = "Most Stabilising for Ligand affinity"
mut_h_lig_ss$mutational_effect = "CAUTION: Most DE/Stabilising for Ligand affinity"
cat("\nPass: avg ligand affinity")
}else{
@ -415,6 +416,20 @@ if ( length(colnames(combined_table)) == length(colsNames_combined_table) ) {
stop("\nAbort: No. of cols mismatch. Cannot assign pretty colnames for output")
}
nrow(combined_table)
combined_table$drug_site = ifelse(combined_table$position%in%aa_pos_drug, "drug", "no")
table(combined_table$drug_site)
combined_table$rna_site = ifelse(combined_table$position%in%aa_pos_rna, "rna", "no")
table(combined_table$rna_site)
combined_table$sam_site = ifelse(combined_table$position%in%aa_pos_sam, "sam", "no")
table(combined_table$sam_site)
combined_table$amp_site = ifelse(combined_table$position%in%aa_pos_amp, "amp", "no")
table(combined_table$amp_site)
#--------------------
# write output file: KS test within grpup
#----------------------