diff --git a/scripts/plotting/structure_figures/AFFINITY_TEST_prominent_effects.R b/scripts/plotting/structure_figures/AFFINITY_TEST_prominent_effects.R new file mode 100644 index 0000000..2ac94b8 --- /dev/null +++ b/scripts/plotting/structure_figures/AFFINITY_TEST_prominent_effects.R @@ -0,0 +1,138 @@ + +foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,] +bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,] + +wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity) +wilcox.test(foo$mmcsm_lig~foo$sensitivity) + +wilcox.test(foo$affinity_scaled~foo$sensitivity) +wilcox.test(foo$ligand_affinity_change~foo$sensitivity) + +wilcox.test(bar$mcsm_na_scaled~bar$sensitivity) +wilcox.test(bar$mcsm_na_affinity~bar$sensitivity) + +wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity) +wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity) + + +# find the most "impactful" effect value +biggest=max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))) + +abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==biggest + +abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==c(,biggest) + +max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))) + + +a2 = (a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]) +a2 +# +# biggest = max(abs(a2[1,])) +# +# #hmm +# #which(abs(a2) == biggest) +# #names(a2)[apply(a2, 1:4, function(i) which(i == max()))] +# +# # get row max +# a2$row_maximum = apply(abs(a2[,-1]), 1, max) +# +# # get colname for abs(max_value) +# #https://stackoverflow.com/questions/36960010/get-column-name-that-matches-specific-row-value-in-dataframe +# #names(df)[which(df == 1, arr.ind=T)[, "col"]] +# # yayy +# names(a2)[which(abs(a2) == biggest, arr.ind=T)[, "col"]] +# +# #another:https://statisticsglobe.com/return-column-name-of-largest-value-for-each-row-in-r +# colnames(a2)[max.col(abs(a2), ties.method = "first")] # Apply colnames & max.col functions +# ################################################# +# # use whole df +# #gene_aff_cols = c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled') +# +# biggest = max(abs(a[gene_aff_cols])) +# a$max_es = biggest +# a$effect = names(a[gene_aff_cols])[which(abs(a[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] +# +# effect_name = unique(a$effect) +# #get index of value of max effect +# ind = (which(abs(a[effect_name]) == biggest, arr.ind=T)) +# a[effect_name][ind] +# # extract sign +# a$effect_sign = sign(a[effect_name][ind]) +######################################################## +# maxn <- function(n) function(x) order(x, decreasing = TRUE)[n] +# second_big = abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols])] +# apply(df, 1, function(x)x[maxn(1)(x)]) +# apply(a[gene_aff_cols], 1, function(x) abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols]))]) +######################################################### +# loop +a2 = df2[df2$position%in%c(167, 423, 427),] +test <- a2 %>% + dplyr::group_by(position) %>% + biggest = max(abs(a2[gene_aff_cols])) + a2$max_es = max(abs(a2[gene_aff_cols])) + a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] + effect_name = unique(a2$effect) + + #get index of value of max effect + ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T)) + a2[effect_name][ind] + # extract sign + a2$effect_dir = sign(a2[effect_name][ind]) +################################# +df2_short = df2[df2$position%in%c(167, 423, 427),] + +for (i in unique(df2_short$position) ){ + #print(i) + #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) + #cat(length(unique(df2$position))) + a2 = df2_short[df2_short$position==i,] + biggest = max(abs(a2[gene_aff_cols])) + a2$max_es = max(abs(a2[gene_aff_cols])) + a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] + effect_name = unique(a2$effect) + + #get index of value of max effect + ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T)) + a2[effect_name][ind] + # extract sign + a2$effect_sign = sign(a2[effect_name][ind]) +} + +#======================== +df2_short = df3[df3$position%in%c(167, 423, 427),] +df2_short = df3[df3$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),] +df2_short = df3[df3$position%in%c(435, 480),] +df2_short = df3[df3$position%in%c(435, 480),] + +give_col=function(x,y,df=df2_short){ + df[df$position==x,y] +} + +for (i in unique(df2_short$position) ){ + #print(i) + #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) + #cat(length(unique(df2$position))) + #df2_short[df2_short$position==i,gene_aff_cols] + + biggest = max(abs(give_col(i,gene_aff_cols))) + + df2_short[df2_short$position==i,'abs_max_effect'] = biggest + df2_short[df2_short$position==i,'effect_type']= names( + give_col(i,gene_aff_cols)[which( + abs( + give_col(i,gene_aff_cols) + ) == biggest, arr.ind=T + )[, "col"]]) + + effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values + + # get index/rowname for value of max effect, and then use it to get the original sign + # here + #df2_short[df2_short$position==i,c(effect_name)] + #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) + ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) + df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,]) +} + +df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values \ No newline at end of file