LSHTM_analysis/scripts/plotting/structure_figures/AFFINITY_TEST_prominent_effects.R

138 lines
No EOL
5.3 KiB
R

foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,]
bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,]
wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity)
wilcox.test(foo$mmcsm_lig~foo$sensitivity)
wilcox.test(foo$affinity_scaled~foo$sensitivity)
wilcox.test(foo$ligand_affinity_change~foo$sensitivity)
wilcox.test(bar$mcsm_na_scaled~bar$sensitivity)
wilcox.test(bar$mcsm_na_affinity~bar$sensitivity)
wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity)
wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity)
# find the most "impactful" effect value
biggest=max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])))
abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==biggest
abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==c(,biggest)
max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])))
a2 = (a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])
a2
#
# biggest = max(abs(a2[1,]))
#
# #hmm
# #which(abs(a2) == biggest)
# #names(a2)[apply(a2, 1:4, function(i) which(i == max()))]
#
# # get row max
# a2$row_maximum = apply(abs(a2[,-1]), 1, max)
#
# # get colname for abs(max_value)
# #https://stackoverflow.com/questions/36960010/get-column-name-that-matches-specific-row-value-in-dataframe
# #names(df)[which(df == 1, arr.ind=T)[, "col"]]
# # yayy
# names(a2)[which(abs(a2) == biggest, arr.ind=T)[, "col"]]
#
# #another:https://statisticsglobe.com/return-column-name-of-largest-value-for-each-row-in-r
# colnames(a2)[max.col(abs(a2), ties.method = "first")] # Apply colnames & max.col functions
# #################################################
# # use whole df
# #gene_aff_cols = c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')
#
# biggest = max(abs(a[gene_aff_cols]))
# a$max_es = biggest
# a$effect = names(a[gene_aff_cols])[which(abs(a[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
#
# effect_name = unique(a$effect)
# #get index of value of max effect
# ind = (which(abs(a[effect_name]) == biggest, arr.ind=T))
# a[effect_name][ind]
# # extract sign
# a$effect_sign = sign(a[effect_name][ind])
########################################################
# maxn <- function(n) function(x) order(x, decreasing = TRUE)[n]
# second_big = abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols])]
# apply(df, 1, function(x)x[maxn(1)(x)])
# apply(a[gene_aff_cols], 1, function(x) abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols]))])
#########################################################
# loop
a2 = df2[df2$position%in%c(167, 423, 427),]
test <- a2 %>%
dplyr::group_by(position) %>%
biggest = max(abs(a2[gene_aff_cols]))
a2$max_es = max(abs(a2[gene_aff_cols]))
a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
effect_name = unique(a2$effect)
#get index of value of max effect
ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T))
a2[effect_name][ind]
# extract sign
a2$effect_dir = sign(a2[effect_name][ind])
#################################
df2_short = df2[df2$position%in%c(167, 423, 427),]
for (i in unique(df2_short$position) ){
#print(i)
#print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
#cat(length(unique(df2$position)))
a2 = df2_short[df2_short$position==i,]
biggest = max(abs(a2[gene_aff_cols]))
a2$max_es = max(abs(a2[gene_aff_cols]))
a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
effect_name = unique(a2$effect)
#get index of value of max effect
ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T))
a2[effect_name][ind]
# extract sign
a2$effect_sign = sign(a2[effect_name][ind])
}
#========================
df2_short = df3[df3$position%in%c(167, 423, 427),]
df2_short = df3[df3$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),]
df2_short = df3[df3$position%in%c(435, 480),]
df2_short = df3[df3$position%in%c(435, 480),]
give_col=function(x,y,df=df2_short){
df[df$position==x,y]
}
for (i in unique(df2_short$position) ){
#print(i)
#print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
#cat(length(unique(df2$position)))
#df2_short[df2_short$position==i,gene_aff_cols]
biggest = max(abs(give_col(i,gene_aff_cols)))
df2_short[df2_short$position==i,'abs_max_effect'] = biggest
df2_short[df2_short$position==i,'effect_type']= names(
give_col(i,gene_aff_cols)[which(
abs(
give_col(i,gene_aff_cols)
) == biggest, arr.ind=T
)[, "col"]])
effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values
# get index/rowname for value of max effect, and then use it to get the original sign
# here
#df2_short[df2_short$position==i,c(effect_name)]
#which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T)
ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,])
}
df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values