#!/usr/bin/env Rscript #source("~/git/LSHTM_analysis/config/pnca.R") #source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/config/gid.R") #source("~/git/LSHTM_analysis/config/alr.R") #source("~/git/LSHTM_analysis/config/katg.R") #source("~/git/LSHTM_analysis/config/rpob.R") # get plottting dfs source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/plotting_colnames.R") #======= # output #======= outdir_images = paste0("~/git/Writing/thesis/images/results/", tolower(gene), "/") ################################################################### # FIXME: ADD distance to NA when SP replies geneL_normal = c("pnca") geneL_na = c("gid", "rpob") geneL_ppi2 = c("alr", "embb", "katg", "rpob") # LigDist_colname # from globals used # ppi2Dist_colname #from globals used # naDist_colname #from globals used common_cols = c("mutationinformation" , "X5uhc_position" , "X5uhc_offset" , "position" , "dst_mode" , "mutation_info_labels" , "sensitivity", dist_columns ) ######################################## categ_cols_to_factor = grep( "_outcome|_info", colnames(merged_df3) ) fact_cols = colnames(merged_df3)[categ_cols_to_factor] if (any(lapply(merged_df3[, fact_cols], class) == "character")){ cat("\nChanging", length(categ_cols_to_factor), "cols to factor") merged_df3[, fact_cols] <- lapply(merged_df3[, fact_cols], as.factor) if (all(lapply(merged_df3[, fact_cols], class) == "factor")){ cat("\nSuccessful: cols changed to factor") } }else{ cat("\nRequested cols aready factors") } cat("\ncols changed to factor are:\n", colnames(merged_df3)[categ_cols_to_factor] ) #################################### # merged_df3: NECESSARY pre-processing ################################### #df3 = merged_df3 plot_cols = c("mutationinformation", "mutation_info_labels", "position", "dst_mode" , all_cols) all_cols = c(common_cols , all_stability_cols , all_affinity_cols , all_conserv_cols) # counting foo = merged_df3[, c("mutationinformation" , "wild_pos" , "position" , "sensitivity" , "avg_lig_affinity" , "avg_lig_affinity_scaled" , "avg_lig_affinity_outcome" , "ligand_distance" , "ligand_affinity_change" , "affinity_scaled" , "ligand_outcome" , "consurf_colour_rev" , "consurf_outcome")] table(foo$consurf_outcome) foo2 = foo[foo$ligand_distance<10,] table(foo2$ligand_outcome) ############################# # wide plots SNP # DRUG length(aa_pos_drug); aa_pos_drug drug = foo[foo$position%in%aa_pos_drug,] drug$wild_pos length(unique(drug$position)); unique(drug$position) table(drug$position) drug$mutationinformation[drug$position==306] drug$mutationinformation[drug$position==303] #CA length(aa_pos_ca); aa_pos_ca ca = foo[foo$position%in%aa_pos_ca,] ca$position; length(unique(ca$position)) table(ca$position) # DSL length(aa_pos_dsl); aa_pos_dsl dsl= foo[foo$position%in%aa_pos_dsl,] dsl$position; length(unique(dsl$position)) table(dsl$position) dsl$mutationinformation[dsl$position==330] dsl$mutationinformation[dsl$position==438] dsl$mutationinformation[dsl$position==439] dsl$mutationinformation[dsl$position==510] # CDL length(aa_pos_cdl); aa_pos_cdl cdl= foo[foo$position%in%aa_pos_cdl,] length(unique(cdl$position)); cdl$position; table(cdl$position) cdl$mutationinformation[cdl$position==456] cdl$mutationinformation[cdl$position==521] cdl$mutationinformation[cdl$position==554] cdl$mutationinformation[cdl$position==568] cdl$mutationinformation[cdl$position==575] cdl$mutationinformation[cdl$position==580] cdl$mutationinformation[cdl$position==658] cdl$mutationinformation[cdl$position==665] ############################################### # OR plot bar = merged_df3[, c("mutationinformation" , "wild_pos" , "position" , "sensitivity" , affinity_dist_colnames , "or_mychisq" , "pval_fisher" #, "pval_chisq" , "neglog_pval_fisher" , "log10_or_mychisq")] # bar$p_adj_bonferroni = p.adjust(bar$pval_fisher, method = "bonferroni") # bar$signif_bon = bar$p_adj_bonferroni # bar = dplyr::mutate(bar # , signif_bon = case_when(signif_bon == 0.05 ~ "." # , signif_bon <=0.0001 ~ '****' # , signif_bon <=0.001 ~ '***' # , signif_bon <=0.01 ~ '**' # , signif_bon <0.05 ~ '*' # , TRUE ~ 'ns')) bar$p_adj_fdr = p.adjust(bar$pval_fisher, method = "BH") bar$signif_fdr = bar$p_adj_fdr bar = dplyr::mutate(bar , signif_fdr = case_when(signif_fdr == 0.05 ~ "." , signif_fdr <=0.0001 ~ '****' , signif_fdr <=0.001 ~ '***' , signif_fdr <=0.01 ~ '**' , signif_bon <0.05 ~ '*' , TRUE ~ 'ns')) # sort df bar = bar[order(bar$or_mychisq, decreasing = T), ] bar = bar[, c("mutationinformation" , "wild_pos" , "position" , "sensitivity" , affinity_dist_colnames , "or_mychisq" #, "pval_fisher" #, "pval_chisq" #, "neglog_pval_fisher" #, "log10_or_mychisq" #, "signif_bon" , "p_adj_fdr" , "signif_fdr")] table(bar$sensitivity) table(bar$or_mychisq>1&bar$signif_fdr) # sen and res ~ OR str(bar) sen = bar[bar$or_mychisq<1,] sen = na.omit(sen) res = bar[bar$or_mychisq>1,] res = na.omit(res) # comp bar_or = bar[!is.na(bar$or_mychisq),] table(bar_or$sensitivity) sen1 = bar_or[bar_or$or_mychisq<1,] # sen and res ~OR res1 = bar_or[bar_or$or_mychisq>1,] # sen and res ~OR # sanity check if (nrow(bar_or) == nrow(sen1) + nrow(res1) ){ cat("\nPASS: df with or successfully sourced" , "\nCalculating % of muts with OR>1") }else{ stop("Abort: df with or numbers mimatch") } # percent for OR muts pc_orR = nrow(res1)/(nrow(sen1) + nrow(res1)); pc_orR cat("\nPercentage of muts with OR>1 i.e resistant:" , pc_orR *100 ) # muts with highest OR head(bar_or$mutationinformation, 10) # sort df bar_or = bar_or[order(bar_or$or_mychisq , bar_or$ligand_distance , bar_or$interface_dist , decreasing = T), ] bar_or$drug_site = ifelse(bar_or$position%in%aa_pos_drug, "drug", "no") table(bar_or$drug_site) bar_or$dsl_site = ifelse(bar_or$position%in%aa_pos_dsl, "dsl", "no") table(bar_or$dsl_site) bar_or$ca_site = ifelse(bar_or$position%in%aa_pos_ca, "ca", "no") table(bar_or$ca_site) bar_or$cdl_site = ifelse(bar_or$position%in%aa_pos_cdl, "cdl", "no") table(bar_or$cdl_site) top10_or = bar_or[1:10,] # are these active sites top10_or$position[top10_or$position%in%active_aa_pos] # clostest most sig bar_or_lig = bar_or[bar_or$ligand_distance<10,] bar_or_lig = bar_or_lig[order(bar_or_lig$ligand_distance, -bar_or_lig$or_mychisq), ] table(bar_or_lig$signif_fdr) bar_or_ppi = bar_or[bar_or$interface_dist<10,] bar_or_ppi = bar_or_ppi[order(bar_or_ppi$interface_dist, -bar_or_ppi$or_mychisq), ] table(bar_or_ppi$signif_fdr)