foo = df3_affinity_filtered[df3_affinity_filtered$ligand_distance<10,] bar = df3_affinity_filtered[df3_affinity_filtered$interface_dist<10,] wilcox.test(foo$mmcsm_lig_scaled~foo$sensitivity) wilcox.test(foo$mmcsm_lig~foo$sensitivity) wilcox.test(foo$affinity_scaled~foo$sensitivity) wilcox.test(foo$ligand_affinity_change~foo$sensitivity) wilcox.test(bar$mcsm_na_scaled~bar$sensitivity) wilcox.test(bar$mcsm_na_affinity~bar$sensitivity) wilcox.test(bar$mcsm_ppi2_scaled~bar$sensitivity) wilcox.test(bar$mcsm_ppi2_affinity~bar$sensitivity) # find the most "impactful" effect value biggest=max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))) abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==biggest abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))==c(,biggest) max(abs((a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]))) a2 = (a[c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled')]) a2 # # biggest = max(abs(a2[1,])) # # #hmm # #which(abs(a2) == biggest) # #names(a2)[apply(a2, 1:4, function(i) which(i == max()))] # # # get row max # a2$row_maximum = apply(abs(a2[,-1]), 1, max) # # # get colname for abs(max_value) # #https://stackoverflow.com/questions/36960010/get-column-name-that-matches-specific-row-value-in-dataframe # #names(df)[which(df == 1, arr.ind=T)[, "col"]] # # yayy # names(a2)[which(abs(a2) == biggest, arr.ind=T)[, "col"]] # # #another:https://statisticsglobe.com/return-column-name-of-largest-value-for-each-row-in-r # colnames(a2)[max.col(abs(a2), ties.method = "first")] # Apply colnames & max.col functions # ################################################# # # use whole df # #gene_aff_cols = c('affinity_scaled','mmcsm_lig_scaled','mcsm_ppi2_scaled','mcsm_na_scaled') # # biggest = max(abs(a[gene_aff_cols])) # a$max_es = biggest # a$effect = names(a[gene_aff_cols])[which(abs(a[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] # # effect_name = unique(a$effect) # #get index of value of max effect # ind = (which(abs(a[effect_name]) == biggest, arr.ind=T)) # a[effect_name][ind] # # extract sign # a$effect_sign = sign(a[effect_name][ind]) ######################################################## # maxn <- function(n) function(x) order(x, decreasing = TRUE)[n] # second_big = abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols])] # apply(df, 1, function(x)x[maxn(1)(x)]) # apply(a[gene_aff_cols], 1, function(x) abs(a[gene_aff_cols])[maxn(2)(abs(a[gene_aff_cols]))]) ######################################################### # loop a2 = df2[df2$position%in%c(167, 423, 427),] test <- a2 %>% dplyr::group_by(position) %>% biggest = max(abs(a2[gene_aff_cols])) a2$max_es = max(abs(a2[gene_aff_cols])) a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] effect_name = unique(a2$effect) #get index of value of max effect ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T)) a2[effect_name][ind] # extract sign a2$effect_dir = sign(a2[effect_name][ind]) ################################# df2_short = df2[df2$position%in%c(167, 423, 427),] for (i in unique(df2_short$position) ){ #print(i) #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) #cat(length(unique(df2$position))) a2 = df2_short[df2_short$position==i,] biggest = max(abs(a2[gene_aff_cols])) a2$max_es = max(abs(a2[gene_aff_cols])) a2$effect = names(a2[gene_aff_cols])[which(abs(a2[gene_aff_cols]) == biggest, arr.ind=T)[, "col"]] effect_name = unique(a2$effect) #get index of value of max effect ind = (which(abs(a2[effect_name]) == biggest, arr.ind=T)) a2[effect_name][ind] # extract sign a2$effect_sign = sign(a2[effect_name][ind]) } #======================== df2_short = df3[df3$position%in%c(167, 423, 427),] df2_short = df3[df3$position%in%c(170, 167, 493, 453, 435, 433, 480, 456, 445),] df2_short = df3[df3$position%in%c(435, 480),] df2_short = df3[df3$position%in%c(435, 480),] give_col=function(x,y,df=df2_short){ df[df$position==x,y] } for (i in unique(df2_short$position) ){ #print(i) #print(paste0("\nNo. of unique positions:", length(unique(df2$position))) ) #cat(length(unique(df2$position))) #df2_short[df2_short$position==i,gene_aff_cols] biggest = max(abs(give_col(i,gene_aff_cols))) df2_short[df2_short$position==i,'abs_max_effect'] = biggest df2_short[df2_short$position==i,'effect_type']= names( give_col(i,gene_aff_cols)[which( abs( give_col(i,gene_aff_cols) ) == biggest, arr.ind=T )[, "col"]]) effect_name = df2_short[df2_short$position==i,'effect_type'][1] # pick first one in case we have multiple exact values # get index/rowname for value of max effect, and then use it to get the original sign # here #df2_short[df2_short$position==i,c(effect_name)] #which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])==biggest, arr.ind=T) ind = rownames(which(abs(df2_short[df2_short$position==i,c('position',effect_name)][effect_name])== biggest, arr.ind=T)) df2_short[df2_short$position==i,'effect_sign'] = sign(df2_short[effect_name][ind,]) } df2_short$effect_type = sub("\\.[0-9]+", "", df2_short$effect_type) # cull duplicate effect types that happen when there are exact duplicate values