still trying affinity phew
This commit is contained in:
parent
d94aa10c9b
commit
214e9232c6
1 changed files with 112 additions and 114 deletions
|
@ -3,7 +3,7 @@
|
|||
#source("~/git/LSHTM_analysis/config/gid.R")
|
||||
#source("~/git/LSHTM_analysis/config/embb.R")
|
||||
#source("~/git/LSHTM_analysis/config/katg.R")
|
||||
source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
#source("~/git/LSHTM_analysis/config/rpob.R")
|
||||
|
||||
source("/home/tanu/git/LSHTM_analysis/my_header.R")
|
||||
#########################################################
|
||||
|
@ -57,15 +57,17 @@ table(df3$sensitivity)
|
|||
|
||||
length(unique((df3$mutationinformation)))
|
||||
all_colnames = as.data.frame(colnames(df3))
|
||||
|
||||
# FIXME: ADD distance to NA when SP replies
|
||||
dist_columns = c("ligand_distance", "interface_dist")
|
||||
DistCutOff = 10
|
||||
common_cols = c("mutationinformation"
|
||||
, "X5uhc_position"
|
||||
, "X5uhc_offset"
|
||||
, "position"
|
||||
, "dst_mode"
|
||||
, "mutation_info_labels"
|
||||
, "sensitivity"
|
||||
, "ligand_distance"
|
||||
, "interface_dist")
|
||||
, "sensitivity", dist_columns )
|
||||
|
||||
all_colnames$`colnames(df3)`[grep("scaled", all_colnames$`colnames(df3)`)]
|
||||
all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
|
||||
|
@ -73,20 +75,20 @@ all_colnames$`colnames(df3)`[grep("outcome", all_colnames$`colnames(df3)`)]
|
|||
#===================
|
||||
# stability cols
|
||||
#===================
|
||||
# raw_cols_stability = c("duet_stability_change"
|
||||
# , "deepddg"
|
||||
# , "ddg_dynamut2"
|
||||
# , "ddg_foldx")
|
||||
#
|
||||
# scaled_cols_stability = c("duet_scaled"
|
||||
# , "deepddg_scaled"
|
||||
# , "ddg_dynamut2_scaled"
|
||||
# , "foldx_scaled")
|
||||
#
|
||||
# outcome_cols_stability = c("duet_outcome"
|
||||
# , "deepddg_outcome"
|
||||
# , "ddg_dynamut2_outcome"
|
||||
# , "foldx_outcome")
|
||||
raw_cols_stability = c("duet_stability_change"
|
||||
, "deepddg"
|
||||
, "ddg_dynamut2"
|
||||
, "ddg_foldx")
|
||||
|
||||
scaled_cols_stability = c("duet_scaled"
|
||||
, "deepddg_scaled"
|
||||
, "ddg_dynamut2_scaled"
|
||||
, "foldx_scaled")
|
||||
|
||||
outcome_cols_stability = c("duet_outcome"
|
||||
, "deepddg_outcome"
|
||||
, "ddg_dynamut2_outcome"
|
||||
, "foldx_outcome")
|
||||
|
||||
#===================
|
||||
# affinity cols
|
||||
|
@ -123,113 +125,109 @@ outcome_cols_affinity = c( "ligand_outcome"
|
|||
# #consurf outcome doesn't exist
|
||||
# )
|
||||
|
||||
######################################################################
|
||||
cols_to_consider = colnames(df3)[colnames(df3)%in%c(common_cols
|
||||
, raw_cols_affinity
|
||||
, scaled_cols_affinity
|
||||
, outcome_cols_affinity
|
||||
# , raw_cols_stability
|
||||
# , scaled_cols_stability
|
||||
# , outcome_cols_stability
|
||||
)]
|
||||
gene_aff_cols = colnames(df3)[colnames(df3)%in%scaled_cols_affinity]
|
||||
gene_stab_cols = colnames(df3)[colnames(df3)%in%scaled_cols_stability]
|
||||
gene_common_cols = colnames(df3)[colnames(df3)%in%common_cols]
|
||||
|
||||
cols_to_extract = cols_to_consider[cols_to_consider%in%c(common_cols
|
||||
, raw_cols_affinity
|
||||
, scaled_cols_affinity)]
|
||||
sel_cols = c(gene_common_cols
|
||||
, gene_stab_cols
|
||||
, gene_aff_cols)
|
||||
|
||||
df3_plot = df3[, cols_to_extract]
|
||||
|
||||
DistCutOff_colnames = c("ligand_distance", "interface_dist")
|
||||
DistCutOff = 10
|
||||
#########################################
|
||||
#df3_plot = df3[, cols_to_extract]
|
||||
df3_plot = df3[, sel_cols]
|
||||
|
||||
######################
|
||||
#FILTERING HMMMM!
|
||||
#all dist <10
|
||||
#for embb this results in 2 muts
|
||||
######################
|
||||
df3_affinity_filtered = df3_plot[ (df3_plot$ligand_distance<10 | df3_plot$interface_dist <10),]
|
||||
df3_affinity_filtered = df3_plot[ (df3_plot$ligand_distance<10 & df3_plot$interface_dist <10),]
|
||||
|
||||
c0u = unique(df3_affinity_filtered$position)
|
||||
length(c0u)
|
||||
|
||||
foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,]
|
||||
bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,]
|
||||
|
||||
wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity)
|
||||
wilcox.test(foo$mmcsm_lig~foo$sensitivity)
|
||||
|
||||
wilcox.test(foo$affinity_scaled~foo$sensitivity)
|
||||
wilcox.test(foo$ligand_affinity_change~foo$sensitivity)
|
||||
|
||||
wilcox.test(bar$mcsm_na_scaled~bar$sensitivity)
|
||||
wilcox.test(bar$mcsm_na_affinity~bar$sensitivity)
|
||||
|
||||
wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity)
|
||||
wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity)
|
||||
|
||||
##############################################################
|
||||
df = df3_affinity_filtered
|
||||
#df = df3_affinity_filtered
|
||||
##########################################
|
||||
#NO FILTERING: annotate the whole df
|
||||
df = df3_plot
|
||||
sum(is.na(df))
|
||||
df2 = na.omit(df) # Apply na.omit function
|
||||
df2 = na.omit(df)
|
||||
c0u = unique(df2$position)
|
||||
length(c0u)
|
||||
|
||||
a = df2[df2$position==37,]
|
||||
sel_cols = c("mutationinformation", "position", scaled_cols_affinity)
|
||||
a = a[, sel_cols]
|
||||
# reassign orig
|
||||
my_df_raw = df3
|
||||
|
||||
##############################################################
|
||||
# FIXME: ADD distance to NA when SP replies
|
||||
|
||||
#####################
|
||||
# Ensemble affinity: affinity_cols
|
||||
# mcsm_lig, mmcsm_lig and mcsm_na
|
||||
#####################
|
||||
# extract outcome cols and map numeric values to the categories
|
||||
# Destabilising == 0, and stabilising == 1 so rescaling can let -1 be destabilising
|
||||
#########################################
|
||||
#=====================================
|
||||
# Affintiy (2 cols): average the scores
|
||||
# across predictors ==> average by
|
||||
# position ==> scale b/w -1 and 1
|
||||
|
||||
# column to average: ens_affinity
|
||||
#=====================================
|
||||
cols_mcsm_lig = c("mutationinformation"
|
||||
, "position"
|
||||
, "sensitivity"
|
||||
, "X5uhc_position"
|
||||
, "X5uhc_offset"
|
||||
, "ligand_distance"
|
||||
, "ligand_outcome"
|
||||
, "mmcsm_lig_outcome")
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
######################################################################
|
||||
##################
|
||||
# merge: mean ensemble stability and affinity by_position
|
||||
####################
|
||||
# if ( class(mean_ens_stability_by_position) && class(mean_ens_affinity_by_position) != "data.frame"){
|
||||
# cat("Y")
|
||||
# }
|
||||
|
||||
common_cols = intersect(colnames(mean_ens_stability_by_position), colnames(mean_ens_affinity_by_position))
|
||||
|
||||
if (dim(mean_ens_stability_by_position) && dim(mean_ens_affinity_by_position)){
|
||||
print(paste0("PASS: dim's match, mering dfs by column :", common_cols))
|
||||
#combined = as.data.frame(cbind(mean_duet_by_position, mean_affinity_by_position ))
|
||||
combined_df = as.data.frame(merge(mean_ens_stability_by_position
|
||||
, mean_ens_affinity_by_position
|
||||
, by = common_cols
|
||||
, all = T))
|
||||
|
||||
cat(paste0("\nnrows combined_df:", nrow(combined_df)
|
||||
, "\nnrows combined_df:", ncol(combined_df)))
|
||||
}else{
|
||||
cat(paste0("FAIL: dim's mismatch, aborting cbind!"
|
||||
, "\nnrows df1:", nrow(mean_duet_by_position)
|
||||
, "\nnrows df2:", nrow(mean_affinity_by_position)))
|
||||
quit()
|
||||
# now subset
|
||||
df3 = df2
|
||||
#######################################################
|
||||
#=================
|
||||
# affinity effect
|
||||
#=================
|
||||
give_col=function(x,y,df=df3){
|
||||
df[df$position==x,y]
|
||||
}
|
||||
|
||||
for (i in unique(df3$position) ){
|
||||
#print(i)
|
||||
biggest = max(abs(give_col(i,gene_aff_cols)))
|
||||
|
||||
df3[df3$position==i,'abs_max_effect'] = biggest
|
||||
df3[df3$position==i,'effect_type']= names(
|
||||
give_col(i,gene_aff_cols)[which(
|
||||
abs(
|
||||
give_col(i,gene_aff_cols)
|
||||
) == biggest, arr.ind=T
|
||||
)[, "col"]])
|
||||
|
||||
# effect_name = unique(df3[df3$position==i,'effect_type'])
|
||||
effect_name = df3[df3$position==i,'effect_type'][1] # pick first one in case we have multiple exact values
|
||||
|
||||
ind = rownames(which(abs(df3[df3$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
|
||||
df3[df3$position==i,'effect_sign'] = sign(df3[effect_name][ind,])
|
||||
}
|
||||
|
||||
df3$effect_type = sub("\\.[0-9]+", "", df3$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
|
||||
df3U = df3[!duplicated(df3[c('position')]), ]
|
||||
table(df3U$effect_type)
|
||||
#########################################################
|
||||
#%% consider stability as well
|
||||
df4 = df2
|
||||
|
||||
#=================
|
||||
# stability + affinity effect
|
||||
#=================
|
||||
effect_cols = c(gene_aff_cols, gene_stab_cols)
|
||||
|
||||
give_col=function(x,y,df=df4){
|
||||
df[df$position==x,y]
|
||||
}
|
||||
|
||||
for (i in unique(df4$position) ){
|
||||
#print(i)
|
||||
biggest = max(abs(give_col(i,effect_cols)))
|
||||
|
||||
df4[df4$position==i,'abs_max_effect'] = biggest
|
||||
df4[df4$position==i,'effect_type']= names(
|
||||
give_col(i,effect_cols)[which(
|
||||
abs(
|
||||
give_col(i,effect_cols)
|
||||
) == biggest, arr.ind=T
|
||||
)[, "col"]])
|
||||
|
||||
# effect_name = unique(df4[df4$position==i,'effect_type'])
|
||||
effect_name = df4[df4$position==i,'effect_type'][1] # pick first one in case we have multiple exact values
|
||||
|
||||
ind = rownames(which(abs(df4[df4$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
|
||||
df4[df4$position==i,'effect_sign'] = sign(df4[effect_name][ind,])
|
||||
}
|
||||
|
||||
df4$effect_type = sub("\\.[0-9]+", "", df4$effect_type) # cull duplicate effect types that happen when there are exact duplicate values
|
||||
df4U = df4[!duplicated(df4[c('position')]), ]
|
||||
table(df4U$effect_type)
|
||||
|
||||
#%%============================================================
|
||||
# output
|
||||
write.csv(combined_df, outfile_mean_ens_st_aff
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue