166 lines
5.2 KiB
R
166 lines
5.2 KiB
R
#=============
|
|
# Data: Input
|
|
#==============
|
|
#source("~/git/LSHTM_analysis/config/embb.R")
|
|
source("~/git/LSHTM_analysis/config/gid.R")
|
|
|
|
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
|
|
|
|
|
|
# Now we need to make a column that fill na in dst with value of dst_mode
|
|
df2 = merged_df2
|
|
#table(df2$dst2)
|
|
|
|
df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst)
|
|
df2$sens2 = ifelse(df2$dst2 == 1, "R", "S")
|
|
table(df2$sens2)
|
|
|
|
|
|
all_snps = unique(df2$mutationinformation)
|
|
all_snps_n = length(all_snps); all_snps_n
|
|
|
|
all_samples_id = unique(df2$id) # different to nrows
|
|
all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows
|
|
|
|
sel_lineage = c("L1", "L2", "L3", "L4")
|
|
df2_lin = df2[df2$lineage%in%sel_lineage,]
|
|
sel_lin_snps = unique(df2_lin$mutationinformation)
|
|
sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n
|
|
|
|
sel_lin_samples_id = unique(df2_lin$id)
|
|
sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n
|
|
|
|
# are the snps that are not in L1-L4 unique to L5-L7
|
|
left_snps = all_snps[!all_snps%in%sel_lin_snps]
|
|
left_snps_n = length(left_snps); left_snps_n
|
|
|
|
if (all_snps_n == sel_lin_snps_n+left_snps_n){
|
|
cat("PASS: left snps extracted for gene", tolower(gene))
|
|
}else{
|
|
stop("Abort: left snps count mismatch")
|
|
}
|
|
|
|
left_snps
|
|
lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
|
|
table(lef_snps_df$lineage)
|
|
|
|
##################################
|
|
# selected lineage plos
|
|
cols_to_subset = c("mutationinformation"
|
|
, "lineage"
|
|
, "dst2"
|
|
, "sens2")
|
|
|
|
|
|
#-----------------------------------------------
|
|
# step 0: Subset a smaller df
|
|
#-----------------------------------------------
|
|
plot_df_gene = df2_lin[, cols_to_subset]
|
|
|
|
#-----------------------------------------------
|
|
# step 1: Select muts for each target
|
|
#-----------------------------------------------
|
|
# embb
|
|
#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S")
|
|
# gid
|
|
sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D)
|
|
# katg
|
|
#sel_mutsP = c("")
|
|
# rpob
|
|
#sel_mutsP = c("")
|
|
#-----------------------------------------------
|
|
# step 2: Subset data with just those genes
|
|
#-----------------------------------------------
|
|
plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,]
|
|
cat("\nnrow of plot_df:", nrow(plot_df_gene))
|
|
table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation)
|
|
#-----------------------------------------------
|
|
# step 3: Assign to plot_df
|
|
#-----------------------------------------------
|
|
|
|
plot_df = plot_df_gene
|
|
|
|
#-----------------------------------------------
|
|
# step 4: Add p-value
|
|
#-----------------------------------------------
|
|
plot_df$pval = NULL
|
|
for (i in sel_mutsP) {
|
|
#print (i)
|
|
s_mut = plot_df[plot_df$mutationinformation == i,]
|
|
#print(s_mut)
|
|
s_tab = table(s_mut$lineage, s_mut$sens2)
|
|
#print(s_tab)
|
|
#ft_pvalue_i = round(fisher.test(s_tab)$p.value, 3)
|
|
ft_pvalue_i = fisher.test(s_tab
|
|
#, workspace=2e9
|
|
#, simulate.p.value=TRUE,B=1e7
|
|
)$p.value
|
|
#print(ft_pvalue_i)
|
|
plot_df$pval[plot_df$mutationinformation == i] <- ft_pvalue_i
|
|
#print(s_tab)
|
|
}
|
|
|
|
plot_df$pvalR = round(plot_df$pval, 3)
|
|
|
|
# round P-values
|
|
plot_df$pvalRF = plot_df$pvalR
|
|
plot_df = dplyr::mutate(plot_df
|
|
, pvalRF = case_when(pvalRF == 0.05 ~ "P ."
|
|
, pvalRF <=0.0001 ~ 'P ****'
|
|
, pvalRF <=0.001 ~ 'P ***'
|
|
, pvalRF <=0.01 ~ 'P **'
|
|
, pvalRF <0.05 ~ 'P *'
|
|
, TRUE ~ 'ns'))
|
|
|
|
plot_df
|
|
|
|
head(plot_df)
|
|
table(plot_df$pvalR<0.05)
|
|
|
|
#-----------------------------------------------
|
|
# step 5: Plot
|
|
#-----------------------------------------------
|
|
p_title = gene
|
|
ts = 8
|
|
gls = 3
|
|
DSplot = ggplot(plot_df, aes(x = lineage,
|
|
fill = sens2)) +
|
|
geom_bar(stat = 'count') +
|
|
scale_fill_manual(name = ""
|
|
# name = leg_title
|
|
, values = c("red", "blue")
|
|
#, labels = levels(sens2))
|
|
)+
|
|
facet_wrap(~mutationinformation
|
|
, scales = 'free_y'
|
|
#, ncol = 3
|
|
, nrow = 1
|
|
) +
|
|
theme(legend.position = "top"
|
|
, plot.title = element_text(hjust = 0.5, size=15,face = "italic")
|
|
#, plot.title = element_blank()
|
|
|
|
, strip.text = element_text(size=ts+2)
|
|
, axis.text.x = element_text(size=ts)
|
|
, axis.text.y = element_text(size=ts)
|
|
, axis.title.y = element_text(size=ts)
|
|
, legend.title = element_blank()
|
|
, axis.title.x = element_blank()
|
|
)+
|
|
labs(title = paste0(p_title
|
|
#, ": sensitivity by lineage"
|
|
),
|
|
y = 'Sample Count') #+
|
|
#geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75))
|
|
# geom_blank(aes(y = ypos_label+1.25)) +
|
|
# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls)
|
|
|
|
#========
|
|
# Outplot
|
|
#========
|
|
outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/"
|
|
png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png")
|
|
, width = 8
|
|
, height = 3, units = "in", res = 300 )
|
|
DSplot
|
|
dev.off()
|