From 365c32295370237bfd34fe8770898d5f606db54b Mon Sep 17 00:00:00 2001 From: Tanushree Tunstall Date: Sat, 13 Aug 2022 14:54:51 +0100 Subject: [PATCH] added fd corrected p-values for ks stats --- config/embb.R | 8 +- scripts/ks_test_all_PS.R | 245 +++++++++++------ .../plotting/plotting_thesis/preformatting.R | 256 +++++++++++------- 3 files changed, 333 insertions(+), 176 deletions(-) diff --git a/config/embb.R b/config/embb.R index adfbd05..1309643 100644 --- a/config/embb.R +++ b/config/embb.R @@ -109,6 +109,10 @@ cat("\n===================================================" ) ############################################################## # var for position customisation for plots -aa_pos_lig1 = aa_pos_ca +# aa_pos_lig1 = aa_pos_ca +# aa_pos_lig2 = aa_pos_cdl +# aa_pos_lig3 = aa_pos_dsl + +aa_pos_lig1 = aa_pos_dsl aa_pos_lig2 = aa_pos_cdl -aa_pos_lig3 = aa_pos_dsl \ No newline at end of file +aa_pos_lig3 = aa_pos_ca \ No newline at end of file diff --git a/scripts/ks_test_all_PS.R b/scripts/ks_test_all_PS.R index 65a72c9..97e93d2 100755 --- a/scripts/ks_test_all_PS.R +++ b/scripts/ks_test_all_PS.R @@ -18,38 +18,26 @@ source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") # Output #============= outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") -outdir_stats = "~/git/LSHTM_analysis/scripts/plotting/plotting_thesis/stats/" +outdir_stats = paste0(outdir_images,"stats/") # ks test by lineage #ks_lineage = paste0(outdir, "/KS_lineage_all_muts.csv") ########################### # Data for stats -# you need merged_df2 or merged_df2_comp +# you need df2 or df2_comp # since this is one-many relationship -# i.e the same SNP can belong to multiple lineages -# using the _comp dataset means -# we lose some muts and at this level, we should use -# as much info as available, hence use df with NA ########################### # REASSIGNMENT -my_df = merged_df2 - -# delete variables not required -rm(merged_df2, merged_df2_comp, merged_df3, merged_df3_comp) -rm(merged_df2_lig, merged_df2_comp_lig, merged_df3_lig, merged_df3_comp_lig, my_df_u, my_df_u_lig) +df2 = merged_df2[, colnames(merged_df2)%in%plotting_cols] # quick checks -colnames(my_df) -str(my_df) +colnames(df2) +str(df2) -# Ensure correct data type in columns to plot: need to be factor -#is.factor(my_df$lineage) -#my_df$lineage = as.factor(my_df$lineage) -#is.factor(my_df$lineage) ######################################################################## -table(my_df$lineage); str(my_df$lineage) +table(df2$lineage); str(df2$lineage) # subset only lineages1-4 sel_lineages = c("L1" @@ -58,29 +46,22 @@ sel_lineages = c("L1" , "L4") # subset selected lineages -df_lin = subset(my_df, subset = lineage %in% sel_lineages) +df_lin = subset(df2, subset = lineage %in% sel_lineages) table(df_lin$lineage) table(df_lin$sensitivity) table(df_lin$lineage, df_lin$sensitivity) -#============== -# dr_muts_col -#============== -#table(df_lin$mutation_info); str(df_lin$mutation_info) +#ensure lineage is a factor +#str(df_lin$lineage); str(df_lin$lineage_labels) +#df_lin$lineage = as.factor(df_lin$lineage) +#df_lin$lineage_labels = as.factor(df_lin$lineage) +table(df_lin$lineage); table(df_lin$lineage_labels)] -# subset df with dr muts only -#df_lin_dr = subset(df_lin, mutation_info == dr_muts_col) -#table(df_lin_dr$mutation_info) - -#============== -# other_muts_col -#============== -#df_lin_other = subset(df_lin, mutation_info == other_muts_col) -#table(df_lin_other$mutation_info) - -#======================================================================= +#============================== +# Stats for average stability +#============================= # individual: CHECKS lin1 = df_lin[df_lin$lineage == "L1",]$avg_stability_scaled lin2 = df_lin[df_lin$lineage == "L2",]$avg_stability_scaled @@ -89,14 +70,14 @@ lin4 = df_lin[df_lin$lineage == "L4",]$avg_stability_scaled ks.test(lin1, lin4) - -ks.test(df_lin$avg_stability_scaled[df_lin$lineage == "L2"] - , df_lin$avg_stability_scaled[df_lin$lineage == "L3"]) +ks.test(df_lin$avg_stability_scaled[df_lin$lineage == "L1"] + , df_lin$avg_stability_scaled[df_lin$lineage == "L4"]) #======================================================================= my_lineages = levels(factor(df_lin$lineage)); my_lineages #======================================================================= # Loop +#0 : < 2.2e-16 #===================== # Lineage 1 comparisons #===================== @@ -131,7 +112,7 @@ for (i in my_lineages_comp_l1){ , df_lin$avg_stability_scaled[df_lin$lineage == i])$p.value # print(c(lineage_comp, ks_method, ks_statistic[[1]], ks_pval)) - l1_df$comparison = lineage_comp + l1_df$comparison = lineage_comp l1_df$method = ks_method l1_df$ks_statistic = ks_statistic[[1]] l1_df$ks_pvalue = ks_pvalue @@ -142,6 +123,33 @@ for (i in my_lineages_comp_l1){ ks_df_l1 = rbind(ks_df_l1,l1_df) } + +ks_df_l1 + +# adjusted p-value: bonferroni +ks_df_l1$p_adj_bonferroni = p.adjust(ks_df_l1$ks_pvalue, method = "bonferroni") +ks_df_l1$signif_bon = ks_df_l1$p_adj_bonferroni +ks_df_l1 = dplyr::mutate(ks_df_l1 + , signif_bon = case_when(signif_bon == 0.05 ~ "." + , signif_bon <=0.0001 ~ '****' + , signif_bon <=0.001 ~ '***' + , signif_bon <=0.01 ~ '**' + , signif_bon <0.05 ~ '*' + , TRUE ~ 'ns')) + +# adjusted p-value:fdr +ks_df_l1$p_adj_fdr = p.adjust(ks_df_l1$ks_pvalue, method = "fdr") +ks_df_l1$signif_fdr = ks_df_l1$p_adj_fdr +ks_df_l1 = dplyr::mutate(ks_df_l1 + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) + +ks_df_l1 + ##################################################################### #===================== @@ -190,6 +198,31 @@ for (i in my_lineages_comp_l2){ } + +# adjusted p-value: bonferroni +ks_df_l2$p_adj_bonferroni = p.adjust(ks_df_l2$ks_pvalue, method = "bonferroni") +ks_df_l2$signif_bon = ks_df_l2$p_adj_bonferroni +ks_df_l2 = dplyr::mutate(ks_df_l2 + , signif_bon = case_when(signif_bon == 0.05 ~ "." + , signif_bon <=0.0001 ~ '****' + , signif_bon <=0.001 ~ '***' + , signif_bon <=0.01 ~ '**' + , signif_bon <0.05 ~ '*' + , TRUE ~ 'ns')) + +# adjusted p-value:fdr +ks_df_l2$p_adj_fdr = p.adjust(ks_df_l2$ks_pvalue, method = "fdr") +ks_df_l2$signif_fdr = ks_df_l2$p_adj_fdr +ks_df_l2 = dplyr::mutate(ks_df_l2 + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) + +ks_df_l2 + #===================== # Lineage 3 comparisons #===================== @@ -234,11 +267,33 @@ for (i in my_lineages_comp_l3){ ks_df_l3 = rbind(ks_df_l3, l3_df) } -###################################################################################### +# adjusted p-value: bonferroni +ks_df_l3$p_adj_bonferroni = p.adjust(ks_df_l3$ks_pvalue, method = "bonferroni") +ks_df_l3$signif_bon = ks_df_l3$p_adj_bonferroni +ks_df_l3 = dplyr::mutate(ks_df_l3 + , signif_bon = case_when(signif_bon == 0.05 ~ "." + , signif_bon <=0.0001 ~ '****' + , signif_bon <=0.001 ~ '***' + , signif_bon <=0.01 ~ '**' + , signif_bon <0.05 ~ '*' + , TRUE ~ 'ns')) + +# adjusted p-value:fdr +ks_df_l3$p_adj_fdr = p.adjust(ks_df_l3$ks_pvalue, method = "fdr") +ks_df_l3$signif_fdr = ks_df_l3$p_adj_fdr +ks_df_l3 = dplyr::mutate(ks_df_l3 + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) + +ks_df_l3 #################################################################### -# combine all 4 ks_dfs +# combine all 3 ks_dfs n_dfs = 3 if ( all.equal(nrow(ks_df_l1), nrow(ks_df_l2), nrow(ks_df_l3)) && all.equal(ncol(ks_df_l1), ncol(ks_df_l2), ncol(ks_df_l3)) ){ @@ -263,12 +318,9 @@ if ( all.equal(nrow(ks_df_l1), nrow(ks_df_l2), nrow(ks_df_l3)) && , "\nCheck hardcoded value of n_dfs") } -#-------------- -# formatting -#-------------- -# add total_n number -#ks_df_combined$total_samples_analysed = nrow(df_lin) - +#---------------------------- +# ADD extra cols: formatting +#---------------------------- # adding pvalue_signif ks_df_combined$pvalue_signif = ks_df_combined$ks_pvalue str(ks_df_combined$pvalue_signif) @@ -322,9 +374,31 @@ overall_RS_df$ks_pvalue = ks_pvalue overall_RS_df$n_samples = n_samples_all overall_RS_df$n_samples_total= n_samples_total -#-------------- -# formatting -#-------------- +#---------------------------- +# ADD extra cols +#---------------------------- +# adjusted p-value: bonferroni +overall_RS_df$p_adj_bonferroni = p.adjust(overall_RS_df$ks_pvalue, method = "bonferroni") +overall_RS_df$signif_bon = overall_RS_df$p_adj_bonferroni +overall_RS_df = dplyr::mutate(overall_RS_df + , signif_bon = case_when(signif_bon == 0.05 ~ "." + , signif_bon <=0.0001 ~ '****' + , signif_bon <=0.001 ~ '***' + , signif_bon <=0.01 ~ '**' + , signif_bon <0.05 ~ '*' + , TRUE ~ 'ns')) + +# adjusted p-value:fdr +overall_RS_df$p_adj_fdr = p.adjust(overall_RS_df$ks_pvalue, method = "fdr") +overall_RS_df$signif_fdr = overall_RS_df$p_adj_fdr +overall_RS_df = dplyr::mutate(overall_RS_df + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) +# unadjusted p-values overall_RS_df$pvalue_signif = overall_RS_df$ks_pvalue overall_RS_df = dplyr::mutate(overall_RS_df , pvalue_signif = case_when(pvalue_signif == 0.05 ~ "." @@ -333,8 +407,6 @@ overall_RS_df = dplyr::mutate(overall_RS_df , pvalue_signif <=0.01 ~ '**' , pvalue_signif <0.05 ~ '*' , TRUE ~ 'ns')) - -overall_RS_df ##################################################################### if (all(colnames(ks_df_combined_f) == colnames(overall_RS_df))){ @@ -349,20 +421,12 @@ ks_df_combined_f2 = rbind(ks_df_combined_f, overall_RS_df) ks_df_combined_f2$ks_comp_type = "between_lineages" ks_df_combined_f2$gene_name = tolower(gene) -# #============================== -# # write output file: KS test -# #=============================== -# Out_lineage_bwL = paste0(outdir_stats -# , tolower(gene) -# , "_ks_lineage_bw.csv") -# -# cat("Output of KS test bt lineage:", Out_lineage_bwL) -# write.csv(ks_df_combined_f2, Out_lineage_bwL, row.names = FALSE) +ks_df_combined_f2 ########################################################################### -#======================= +#================================= # Within lineage R vs S: MANUAL -#======================= +#================================= lin1 = df_lin[df_lin$lineage == my_lin1,] ks.test(lin1$avg_stability_scaled[lin1$sensitivity == "R"] , lin1$avg_stability_scaled[lin1$sensitivity == "S"]) @@ -426,35 +490,49 @@ for (i in c(my_lin1 } all_within_lin_df -all_within_lin_df$pvalue_signif = all_within_lin_df$ks_pvalue -str(all_within_lin_df$pvalue_signif) - +#---------------------------- +# ADD extra cols: formatting +#---------------------------- +# adjusted p-value: bonferroni +all_within_lin_df$p_adj_bonferroni = p.adjust(all_within_lin_df$ks_pvalue, method = "bonferroni") +all_within_lin_df$signif_bon = all_within_lin_df$p_adj_bonferroni all_within_lin_df = dplyr::mutate(all_within_lin_df - , pvalue_signif = case_when(pvalue_signif == 0.05 ~ "." - , pvalue_signif <=0.0001 ~ '****' - , pvalue_signif <=0.001 ~ '***' - , pvalue_signif <=0.01 ~ '**' - , pvalue_signif <0.05 ~ '*' + , signif_bon = case_when(signif_bon == 0.05 ~ "." + , signif_bon <=0.0001 ~ '****' + , signif_bon <=0.001 ~ '***' + , signif_bon <=0.01 ~ '**' + , signif_bon <0.05 ~ '*' , TRUE ~ 'ns')) -all_within_lin_df +# adjusted p-value:fdr +all_within_lin_df$p_adj_fdr = p.adjust(all_within_lin_df$ks_pvalue, method = "fdr") +all_within_lin_df$signif_fdr = all_within_lin_df$p_adj_fdr +all_within_lin_df = dplyr::mutate(all_within_lin_df + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_fdr <0.05 ~ '*' + , TRUE ~ 'ns')) -# ADD extra cols +# unadjusted p-value +all_within_lin_df$pvalue_signif = all_within_lin_df$ks_pvalue +str(all_within_lin_df$pvalue_signif) +all_within_lin_df = dplyr::mutate(all_within_lin_df + , pvalue_signif = case_when(pvalue_signif == 0.05 ~ "." + , pvalue_signif <=0.0001 ~ '****' + , pvalue_signif <=0.001 ~ '***' + , pvalue_signif <=0.01 ~ '**' + , pvalue_signif <0.05 ~ '*' + , TRUE ~ 'ns')) + +# ADD info cols all_within_lin_df$ks_comp_type = "within_lineages" all_within_lin_df$gene_name = tolower(gene) -# #-------------------- -# # write output file: KS test within grpup -# #---------------------- -# Out_ks_withinL = paste0(outdir_stats -# , tolower(gene) -# , "_ks_lineage_within.csv") -# cat("Output of KS test within lineage:",Out_ks_withinL ) -# write.csv(all_within_lin_df, Out_ks_withinL, row.names = FALSE) - ################################################################## -if (all(colnames(ks_df_combined_f2) == colnames(Out_ks_withinL))){ +if (all(colnames(ks_df_combined_f2) == colnames(all_within_lin_df))){ cat("\nPASS:combining KS test results") }else{ @@ -469,5 +547,6 @@ ks_df_combined_all = rbind(ks_df_combined_f2, all_within_lin_df) Out_ks_all = paste0(outdir_stats , tolower(gene) , "_ks_lineage_all_comp.csv") + cat("Output of KS test all comparisons:", Out_ks_all ) -write.csv(ks_df_combined_all, Out_ks_all, row.names = FALSE) \ No newline at end of file +write.csv(ks_df_combined_all, Out_ks_all, row.names = FALSE) diff --git a/scripts/plotting/plotting_thesis/preformatting.R b/scripts/plotting/plotting_thesis/preformatting.R index 5e85663..aeb45f4 100644 --- a/scripts/plotting/plotting_thesis/preformatting.R +++ b/scripts/plotting/plotting_thesis/preformatting.R @@ -21,116 +21,190 @@ geneL_normal = c("pnca") geneL_na = c("gid", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob") -# LigDist_colname # from globals used -# ppi2Dist_colname #from globals used -# naDist_colname #from globals used -common_cols = c("mutationinformation" - , "X5uhc_position" - , "X5uhc_offset" - , "position" - , "dst_mode" - , "mutation_info_labels" - , "sensitivity", dist_columns ) +# counting +foo = merged_df3[, c("mutationinformation" + , "wild_pos" + , "position" + , "sensitivity" + , "avg_lig_affinity" + , "avg_lig_affinity_scaled" + , "avg_lig_affinity_outcome" + , "ligand_distance" + , "ligand_affinity_change" + , "affinity_scaled" + , "ligand_outcome" + , "consurf_colour_rev" + , "consurf_outcome")] -#=================== -# stability cols -#=================== -raw_cols_stability = c("duet_stability_change" - , "deepddg" - , "ddg_dynamut2" - , "ddg_foldx" - , "avg_stability") +table(foo$consurf_outcome) -scaled_cols_stability = c("duet_scaled" - , "deepddg_scaled" - , "ddg_dynamut2_scaled" - , "foldx_scaled" - , "foldx_scaled_signC" # needed to get avg stability - , "avg_stability_scaled") +foo2 = foo[foo$ligand_distance<10,] -outcome_cols_stability = c("duet_outcome" - , "deepddg_outcome" - , "ddg_dynamut2_outcome" - , "foldx_outcome" - , "avg_stability_outcome") +table(foo2$ligand_outcome) -all_stability_cols = c(raw_cols_stability - , scaled_cols_stability - , outcome_cols_stability) -#=================== -# affinity cols -#=================== -raw_cols_affinity = c("ligand_affinity_change" - , "mmcsm_lig" - , "mcsm_ppi2_affinity" - , "mcsm_na_affinity" - , "avg_lig_affinity") +############################# +# wide plots SNP +# DRUG +length(aa_pos_drug); aa_pos_drug +drug = foo[foo$position%in%aa_pos_drug,] +drug$wild_pos -scaled_cols_affinity = c("affinity_scaled" - , "mmcsm_lig_scaled" - , "mcsm_ppi2_scaled" - , "mcsm_na_scaled" - , "avg_lig_affinity_scaled") +length(unique(drug$position)); unique(drug$position) +table(drug$position) -outcome_cols_affinity = c( "ligand_outcome" - , "mmcsm_lig_outcome" - , "mcsm_ppi2_outcome" - , "mcsm_na_outcome" - , "avg_lig_affinity_outcome") +drug$mutationinformation[drug$position==306] +drug$mutationinformation[drug$position==303] -all_affinity_cols = c(raw_cols_affinity - , scaled_cols_affinity - , outcome_cols_affinity) -#=================== -# conservation cols -#=================== -raw_cols_conservation = c("consurf_score" - , "snap2_score" - , "provean_score") +#CA +length(aa_pos_ca); aa_pos_ca +ca = foo[foo$position%in%aa_pos_ca,] +ca$position; length(unique(ca$position)) +table(ca$position) -scaled_cols_conservation = c("consurf_scaled" - , "snap2_scaled" - , "provean_scaled") +# DSL +length(aa_pos_dsl); aa_pos_dsl +dsl= foo[foo$position%in%aa_pos_dsl,] +dsl$position; length(unique(dsl$position)) +table(dsl$position) -outcome_cols_conservation = c("provean_outcome" - , "snap2_outcome" - , "consurf_colour_rev" - , "consurf_outcome") - -all_conserv_cols = c(raw_cols_conservation - , scaled_cols_conservation - , outcome_cols_conservation) +dsl$mutationinformation[dsl$position==330] +dsl$mutationinformation[dsl$position==438] +dsl$mutationinformation[dsl$position==439] +dsl$mutationinformation[dsl$position==510] -######################################## -categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) ) -fact_cols = colnames(merged_df3)[categ_cols_to_factor] +# CDL +length(aa_pos_cdl); aa_pos_cdl +cdl= foo[foo$position%in%aa_pos_cdl,] +length(unique(cdl$position)); cdl$position; +table(cdl$position) -if (any(lapply(merged_df3[, fact_cols], class) == "character")){ - cat("\nChanging", length(categ_cols_to_factor), "cols to factor") - merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor) - if (all(lapply(merged_df3[, fact_cols], class) == "factor")){ - cat("\nSuccessful: cols changed to factor") - } +cdl$mutationinformation[cdl$position==456] +cdl$mutationinformation[cdl$position==521] +cdl$mutationinformation[cdl$position==554] +cdl$mutationinformation[cdl$position==568] +cdl$mutationinformation[cdl$position==575] +cdl$mutationinformation[cdl$position==580] +cdl$mutationinformation[cdl$position==658] +cdl$mutationinformation[cdl$position==665] + +############################################### +# OR plot + +bar = merged_df3[, c("mutationinformation" + , "wild_pos" + , "position" + , "sensitivity" + , affinity_dist_colnames + , "or_mychisq" + , "pval_fisher" + #, "pval_chisq" + , "neglog_pval_fisher" + , "log10_or_mychisq")] + +# bar$p_adj_bonferroni = p.adjust(bar$pval_fisher, method = "bonferroni") +# bar$signif_bon = bar$p_adj_bonferroni +# bar = dplyr::mutate(bar +# , signif_bon = case_when(signif_bon == 0.05 ~ "." +# , signif_bon <=0.0001 ~ '****' +# , signif_bon <=0.001 ~ '***' +# , signif_bon <=0.01 ~ '**' +# , signif_bon <0.05 ~ '*' +# , TRUE ~ 'ns')) + +bar$p_adj_fdr = p.adjust(bar$pval_fisher, method = "BH") +bar$signif_fdr = bar$p_adj_fdr +bar = dplyr::mutate(bar + , signif_fdr = case_when(signif_fdr == 0.05 ~ "." + , signif_fdr <=0.0001 ~ '****' + , signif_fdr <=0.001 ~ '***' + , signif_fdr <=0.01 ~ '**' + , signif_bon <0.05 ~ '*' + , TRUE ~ 'ns')) + +# sort df +bar = bar[order(bar$or_mychisq, decreasing = T), ] +bar = bar[, c("mutationinformation" + , "wild_pos" + , "position" + , "sensitivity" + , affinity_dist_colnames + , "or_mychisq" + #, "pval_fisher" + #, "pval_chisq" + #, "neglog_pval_fisher" + #, "log10_or_mychisq" + #, "signif_bon" + , "p_adj_fdr" + , "signif_fdr")] + +table(bar$sensitivity) + +table(bar$or_mychisq>1&bar$signif_fdr) # sen and res ~ OR + +str(bar) +sen = bar[bar$or_mychisq<1,] +sen = na.omit(sen) + +res = bar[bar$or_mychisq>1,] +res = na.omit(res) + +# comp +bar_or = bar[!is.na(bar$or_mychisq),] +table(bar_or$sensitivity) + +sen1 = bar_or[bar_or$or_mychisq<1,] # sen and res ~OR +res1 = bar_or[bar_or$or_mychisq>1,] # sen and res ~OR + +# sanity check +if (nrow(bar_or) == nrow(sen1) + nrow(res1) ){ + cat("\nPASS: df with or successfully sourced" + , "\nCalculating % of muts with OR>1") }else{ - cat("\nRequested cols aready factors") + stop("Abort: df with or numbers mimatch") } -cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] ) +# percent for OR muts +pc_orR = nrow(res1)/(nrow(sen1) + nrow(res1)); pc_orR +cat("\nPercentage of muts with OR>1 i.e resistant:" + , pc_orR *100 ) -#################################### -# merged_df3: NECESSARY pre-processing -################################### -#df3 = merged_df3 -plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode" - , all_cols) +# muts with highest OR +head(bar_or$mutationinformation, 10) + +# sort df +bar_or = bar_or[order(bar_or$or_mychisq + , bar_or$ligand_distance + , bar_or$interface_dist + , decreasing = T), ] + +bar_or$drug_site = ifelse(bar_or$position%in%aa_pos_drug, "drug", "no") +table(bar_or$drug_site) + +bar_or$dsl_site = ifelse(bar_or$position%in%aa_pos_dsl, "dsl", "no") +table(bar_or$dsl_site) + +bar_or$ca_site = ifelse(bar_or$position%in%aa_pos_ca, "ca", "no") +table(bar_or$ca_site) + +bar_or$cdl_site = ifelse(bar_or$position%in%aa_pos_cdl, "cdl", "no") +table(bar_or$cdl_site) +top10_or = bar_or[1:10,] + +# are these active sites +top10_or$position[top10_or$position%in%active_aa_pos] -all_cols = c(common_cols - , all_stability_cols - , all_affinity_cols - , all_conserv_cols) +# clostest most sig +bar_or_lig = bar_or[bar_or$ligand_distance<10,] +bar_or_lig = bar_or_lig[order(bar_or_lig$ligand_distance, -bar_or_lig$or_mychisq), ] +table(bar_or_lig$signif_fdr) + + +bar_or_ppi = bar_or[bar_or$interface_dist<10,] +bar_or_ppi = bar_or_ppi[order(bar_or_ppi$interface_dist, -bar_or_ppi$or_mychisq), ] +table(bar_or_ppi$signif_fdr)