LSHTM_analysis/scripts/plotting/plotting_thesis/gid/lineage_diff_sensitivities.R

134 lines
4.1 KiB
R

#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# Now we need to make a column that fill na in dst with value of dst_mode
df2 = merged_df2
#table(df2$dst2)
df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst)
df2$sens2 = ifelse(df2$dst2 == 1, "R", "S")
table(df2$sens2)
all_snps = unique(df2$mutationinformation)
all_snps_n = length(all_snps); all_snps_n
all_samples_id = unique(df2$id) # different to nrows
all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows
sel_lineage = c("L1", "L2", "L3", "L4")
df2_lin = df2[df2$lineage%in%sel_lineage,]
sel_lin_snps = unique(df2_lin$mutationinformation)
sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n
sel_lin_samples_id = unique(df2_lin$id)
sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n
# are the snps that are not in L1-L4 unique to L5-L7
left_snps = all_snps[!all_snps%in%sel_lin_snps]
left_snps_n = length(left_snps); left_snps_n
if (all_snps_n == sel_lin_snps_n+left_snps_n){
cat("PASS: left snps extracted for gene", tolower(gene))
}else{
stop("Abort: left snps count mismatch")
}
left_snps
lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
table(lef_snps_df$lineage)
##################################
# selected lineage plos
cols_to_subset = c("mutationinformation"
, "lineage"
, "dst2"
, "sens2")
#-----------------------------------------------
# step 0: Subset a smaller df
#-----------------------------------------------
plot_df_gene = df2_lin[, cols_to_subset]
#-----------------------------------------------
# step 1: Select muts for each target
#-----------------------------------------------
# embb
#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S")
# gid
sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D)
# katg
#sel_mutsP = c("")
# rpob
#sel_mutsP = c("")
#-----------------------------------------------
# step 2: Subset data with just those genes
#-----------------------------------------------
plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,]
cat("\nnrow of plot_df:", nrow(plot_df_gene))
table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation)
#-----------------------------------------------
# step 3: Assign to plot_df
#-----------------------------------------------
plot_df = plot_df_gene
#-----------------------------------------------
# step 4: Add p-value
#-----------------------------------------------
#-----------------------------------------------
# step 5: Plot
#-----------------------------------------------
p_title = gene
ts = 8
gls = 3
DSplot = ggplot(plot_df, aes(x = lineage,
fill = sens2)) +
geom_bar(stat = 'count') +
scale_fill_manual(name = ""
# name = leg_title
, values = c("red", "blue")
#, labels = levels(sens2))
)+
facet_wrap(~mutationinformation
, scales = 'free_y'
#, ncol = 3
, nrow = 1
) +
theme(legend.position = "top"
, plot.title = element_text(hjust = 0.5, size=15,face = "italic")
#, plot.title = element_blank()
, strip.text = element_text(size=ts+2)
, axis.text.x = element_text(size=ts)
, axis.text.y = element_text(size=ts)
, axis.title.y = element_text(size=ts)
, legend.title = element_blank()
, axis.title.x = element_blank()
)+
labs(title = paste0(p_title
#, ": sensitivity by lineage"
),
y = 'Sample Count') #+
#geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75))
# geom_blank(aes(y = ypos_label+1.25)) +
# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls)
#========
# Outplot
#========
outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/"
png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png")
, width = 8
, height = 3, units = "in", res = 300 )
DSplot
dev.off()