#============= # Data: Input #============== source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") # Now we need to make a column that fill na in dst with value of dst_mode df2 = merged_df2 df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") table(df2$sens2) all_snps = unique(df2$mutationinformation) all_snps_n = length(all_snps); all_snps_n all_samples_id = unique(df2$id) # different to nrows all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows sel_lineage = c("L1", "L2", "L3", "L4") df2_lin = df2[df2$lineage%in%sel_lineage,] sel_lin_snps = unique(df2_lin$mutationinformation) sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n sel_lin_samples_id = unique(df2_lin$id) sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n # are the snps that are not in L1-L4 unique to L5-L7 left_snps = all_snps[!all_snps%in%sel_lin_snps] left_snps_n = length(left_snps); left_snps_n if (all_snps_n == sel_lin_snps_n+left_snps_n){ cat("PASS: left snps extracted for gene", tolower(gene)) }else{ stop("Abort: left snps count mismatch") } left_snps lef_snps_df = df2[df2$mutationinformation%in%left_snps,] table(lef_snps_df$lineage) ################################## # selected lineage plots ################################## #----------------------------------------------- # step 0: Select muts for each target #----------------------------------------------- # embb #sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") # gid #sel_mutsP = c("") # katg #sel_mutsP = c("") # rpob #sel_mutsP = c("") # selected lineage plos #----------------------------------------------- # step 1: Subset a smaller df #----------------------------------------------- cols_to_subset = c("mutationinformation" , "lineage" , "dst2" , "sens2") plot_df_gene = df2_lin[, cols_to_subset] #----------------------------------------------- # step 2: Subset data with just those genes #----------------------------------------------- plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] cat("\nnrow of plot_df:", nrow(plot_df_gene)) #----------------------------------------------- # step 3: Assign to plot_df #----------------------------------------------- plot_df = plot_df_gene #----------------------------------------------- # step 4: Add p-value #----------------------------------------------- plot_df$pval = NULL for (i in sel_mutsP) { #print (i) s_mut = plot_df[plot_df$mutationinformation == i,] #print(s_mut) s_tab = table(s_mut$lineage, s_mut$sens2) #print(s_tab) #ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3) ft_pvalue_i = fisher.test(s_tab #, workspace=2e9 #, simulate.p.value=TRUE,B=1e7 )$p.value #print(ft_pvalue_i) plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i #print(s_tab) } plot_df$pvalR = round(plot_df$pval, 3) # round P-values plot_df$pvalRF = plot_df$pvalR plot_df = dplyr::mutate(plot_df , pvalRF = case_when(pvalRF == 0.05 ~ "P ." , pvalRF <=0.0001 ~ 'P ****' , pvalRF <=0.001 ~ 'P ***' , pvalRF <=0.01 ~ 'P **' , pvalRF <0.05 ~ 'P *' , TRUE ~ 'ns')) plot_df head(plot_df) table(plot_df$pvalR<0.05) #----------------------------------------------- # step 5: Plot #----------------------------------------------- p_title = gene ts = 8 gls = 3 DSplot = ggplot(plot_df, aes(x = lineage, fill = sens2)) + geom_bar(stat = 'count') + scale_fill_manual(name = "" # name = leg_title , values = c("red", "blue") #, labels = levels(sens2)) )+ facet_wrap(~mutationinformation , scales = 'free_y' #, ncol = 3 , nrow = 1 ) + theme(legend.position = "top" , plot.title = element_text(hjust = 0.5, size=15,face = "italic") #, plot.title = element_blank() , strip.text = element_text(size=ts+2) , axis.text.x = element_text(size=ts) , axis.text.y = element_text(size=ts) , axis.title.y = element_text(size=ts) , legend.title = element_blank() , axis.title.x = element_blank() )+ labs(title = paste0(p_title #, ": sensitivity by lineage" ), y = 'Sample Count') #+ #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) # geom_blank(aes(y = ypos_label+1.25)) + # geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) #======== # Outplot #======== outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") , width = 4 , height = 4, units = "in", res = 300 ) DSplot dev.off()