#============= # Data: Input #============== source("~/git/LSHTM_analysis/config/katg.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") # Now we need to make a column that fill na in dst with value of dst_mode df2 = merged_df2 #table(df2$dst2) df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") table(df2$sens2) all_snps = unique(df2$mutationinformation) all_snps_n = length(all_snps); all_snps_n all_samples_id = unique(df2$id) # different to nrows all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows sel_lineage = c("L1", "L2", "L3", "L4") df2_lin = df2[df2$lineage%in%sel_lineage,] sel_lin_snps = unique(df2_lin$mutationinformation) sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n sel_lin_samples_id = unique(df2_lin$id) sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n # are the snps that are not in L1-L4 unique to L5-L7 left_snps = all_snps[!all_snps%in%sel_lin_snps] left_snps_n = length(left_snps); left_snps_n if (all_snps_n == sel_lin_snps_n+left_snps_n){ cat("PASS: left snps extracted for gene", tolower(gene)) }else{ stop("Abort: left snps count mismatch") } left_snps lef_snps_df = df2[df2$mutationinformation%in%left_snps,] table(lef_snps_df$lineage) ################################## #----------------------------------------------- # step 0: Select muts for each target #----------------------------------------------- # embb #sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") # gid # sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D) # katg sel_mutsP = c("M257V", "G490S", "H116F", "H97N", "R249C", "W300R", "V320L", "S383A") #R463L # rpob #sel_mutsP = c("") # selected lineage plos cols_to_subset = c("mutationinformation" , "lineage" , "dst2" , "sens2") #----------------------------------------------- # step 1: Subset a smaller df #----------------------------------------------- plot_df_gene = df2_lin[, cols_to_subset] #----------------------------------------------- # step 2: Subset data with just those genes #----------------------------------------------- plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] cat("\nnrow of plot_df:", nrow(plot_df_gene)) table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation) #----------------------------------------------- # step 3: Assign to plot_df #----------------------------------------------- plot_df = plot_df_gene #----------------------------------------------- # step 4: Add p-value #----------------------------------------------- #----------------------------------------------- # step 5: Plot #----------------------------------------------- p_title = gene ts = 8 gls = 3 DSplot = ggplot(plot_df, aes(x = lineage, fill = sens2)) + geom_bar(stat = 'count') + scale_fill_manual(name = "" # name = leg_title , values = c("red", "blue") #, labels = levels(sens2)) )+ facet_wrap(~mutationinformation , scales = 'free_y' #, ncol = 3 , nrow = 2 ) + theme(legend.position = "top" , plot.title = element_text(hjust = 0.5, size=15,face = "italic") #, plot.title = element_blank() , strip.text = element_text(size=ts+2) , axis.text.x = element_text(size=ts) , axis.text.y = element_text(size=ts) , axis.title.y = element_text(size=ts) , legend.title = element_blank() , axis.title.x = element_blank() )+ labs(title = paste0(p_title #, ": sensitivity by lineage" ), y = 'Sample Count') #+ #geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75)) # geom_blank(aes(y = ypos_label+1.25)) + # geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls) #======== # Outplot #======== outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") , width = 8 , height = 4, units = "in", res = 300 ) DSplot dev.off()