138 lines
No EOL
5.3 KiB
R
138 lines
No EOL
5.3 KiB
R
|
|
foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,]
|
|
bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,]
|
|
|
|
wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity)
|
|
wilcox.test(foo$mmcsm_lig~foo$sensitivity)
|
|
|
|
wilcox.test(foo$affinity_scaled~foo$sensitivity)
|
|
wilcox.test(foo$ligand_affinity_change~foo$sensitivity)
|
|
|
|
wilcox.test(bar$mcsm_na_scaled~bar$sensitivity)
|
|
wilcox.test(bar$mcsm_na_affinity~bar$sensitivity)
|
|
|
|
wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity)
|
|
wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity)
|
|
|
|
|
|
# find the most "impactful" effect value
|
|
biggest=max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])))
|
|
|
|
abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==biggest
|
|
|
|
abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==c(,biggest)
|
|
|
|
max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])))
|
|
|
|
|
|
a2 = (a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')])
|
|
a2
|
|
#
|
|
# biggest = max(abs(a2[1,]))
|
|
#
|
|
# #hmm
|
|
# #which(abs(a2) == biggest)
|
|
# #names(a2)[apply(a2, 1:4, function(i) which(i == max()))]
|
|
#
|
|
# # get row max
|
|
# a2$row_maximum = apply(abs(a2[,-1]), 1, max)
|
|
#
|
|
# # get colname for abs(max_value)
|
|
# #https://stackoverflow.com/questions/36960010/get-column-name-that-matches-specific-row-value-in-dataframe
|
|
# #names(df)[which(df == 1, arr.ind=T)[, "col"]]
|
|
# # yayy
|
|
# names(a2)[which(abs(a2) == biggest, arr.ind=T)[, "col"]]
|
|
#
|
|
# #another:https://statisticsglobe.com/return-column-name-of-largest-value-for-each-row-in-r
|
|
# colnames(a2)[max.col(abs(a2), ties.method = "first")] # Apply colnames & max.col functions
|
|
# #################################################
|
|
# # use whole df
|
|
# #gene_aff_cols = c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')
|
|
#
|
|
# biggest = max(abs(a[gene_aff_cols]))
|
|
# a$max_es = biggest
|
|
# a$effect = names(a[gene_aff_cols])[which(abs(a[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
|
|
#
|
|
# effect_name = unique(a$effect)
|
|
# #get index of value of max effect
|
|
# ind = (which(abs(a[effect_name]) == biggest, arr.ind=T))
|
|
# a[effect_name][ind]
|
|
# # extract sign
|
|
# a$effect_sign = sign(a[effect_name][ind])
|
|
########################################################
|
|
# maxn <- function(n) function(x) order(x, decreasing = TRUE)[n]
|
|
# second_big = abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols])]
|
|
# apply(df, 1, function(x)x[maxn(1)(x)])
|
|
# apply(a[gene_aff_cols], 1, function(x) abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols]))])
|
|
#########################################################
|
|
# loop
|
|
a2 = df2[df2$position%in%c(167, 423, 427),]
|
|
test <- a2 %>%
|
|
dplyr::group_by(position) %>%
|
|
biggest = max(abs(a2[gene_aff_cols]))
|
|
a2$max_es = max(abs(a2[gene_aff_cols]))
|
|
a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
|
|
effect_name = unique(a2$effect)
|
|
|
|
#get index of value of max effect
|
|
ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T))
|
|
a2[effect_name][ind]
|
|
# extract sign
|
|
a2$effect_dir = sign(a2[effect_name][ind])
|
|
#################################
|
|
df2_short = df2[df2$position%in%c(167, 423, 427),]
|
|
|
|
for (i in unique(df2_short$position) ){
|
|
#print(i)
|
|
#print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
|
|
#cat(length(unique(df2$position)))
|
|
a2 = df2_short[df2_short$position==i,]
|
|
biggest = max(abs(a2[gene_aff_cols]))
|
|
a2$max_es = max(abs(a2[gene_aff_cols]))
|
|
a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]]
|
|
effect_name = unique(a2$effect)
|
|
|
|
#get index of value of max effect
|
|
ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T))
|
|
a2[effect_name][ind]
|
|
# extract sign
|
|
a2$effect_sign = sign(a2[effect_name][ind])
|
|
}
|
|
|
|
#========================
|
|
df2_short = df3[df3$position%in%c(167, 423, 427),]
|
|
df2_short = df3[df3$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),]
|
|
df2_short = df3[df3$position%in%c(435, 480),]
|
|
df2_short = df3[df3$position%in%c(435, 480),]
|
|
|
|
give_col=function(x,y,df=df2_short){
|
|
df[df$position==x,y]
|
|
}
|
|
|
|
for (i in unique(df2_short$position) ){
|
|
#print(i)
|
|
#print(paste0("\nNo. of unique positions:", length(unique(df2$position))) )
|
|
#cat(length(unique(df2$position)))
|
|
#df2_short[df2_short$position==i,gene_aff_cols]
|
|
|
|
biggest = max(abs(give_col(i,gene_aff_cols)))
|
|
|
|
df2_short[df2_short$position==i,'abs_max_effect'] = biggest
|
|
df2_short[df2_short$position==i,'effect_type']= names(
|
|
give_col(i,gene_aff_cols)[which(
|
|
abs(
|
|
give_col(i,gene_aff_cols)
|
|
) == biggest, arr.ind=T
|
|
)[, "col"]])
|
|
|
|
effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values
|
|
|
|
# get index/rowname for value of max effect, and then use it to get the original sign
|
|
# here
|
|
#df2_short[df2_short$position==i,c(effect_name)]
|
|
#which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T)
|
|
ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T))
|
|
df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,])
|
|
}
|
|
|
|
df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values |