renamed files for lineage_diff_sensitivites.R

This commit is contained in:
Tanushree Tunstall 2022-09-05 13:19:06 +01:00
parent 69a0da0a59
commit 1dacebbaf6
5 changed files with 179 additions and 39 deletions

View file

@ -45,20 +45,10 @@ lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
table(lef_snps_df$lineage) table(lef_snps_df$lineage)
################################## ##################################
# selected lineage plos # selected lineage plots
cols_to_subset = c("mutationinformation" ##################################
, "lineage"
, "dst2"
, "sens2")
#----------------------------------------------- #-----------------------------------------------
# step 0: Subset a smaller df # step 0: Select muts for each target
#-----------------------------------------------
plot_df_gene = df2_lin[, cols_to_subset]
#-----------------------------------------------
# step 1: Select muts for each target
#----------------------------------------------- #-----------------------------------------------
# embb # embb
#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S") #sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S")
@ -69,6 +59,18 @@ sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D)
# rpob # rpob
#sel_mutsP = c("") #sel_mutsP = c("")
#----------------------------------------------- #-----------------------------------------------
# step 1: Subset a smaller df
#-----------------------------------------------
# selected lineage plos
cols_to_subset = c("mutationinformation"
, "lineage"
, "dst2"
, "sens2")
plot_df_gene = df2_lin[, cols_to_subset]
#-----------------------------------------------
# step 2: Subset data with just those genes # step 2: Subset data with just those genes
#----------------------------------------------- #-----------------------------------------------
plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,]
@ -82,9 +84,9 @@ plot_df = plot_df_gene
#----------------------------------------------- #-----------------------------------------------
# step 4: Add p-value # step 4: Add p-value
# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed
#----------------------------------------------- #-----------------------------------------------
#----------------------------------------------- #-----------------------------------------------
# step 5: Plot # step 5: Plot
#----------------------------------------------- #-----------------------------------------------

View file

@ -0,0 +1,136 @@
#=============
# Data: Input
#==============
#source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# Now we need to make a column that fill na in dst with value of dst_mode
df2 = merged_df2
#table(df2$dst2)
df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst)
df2$sens2 = ifelse(df2$dst2 == 1, "R", "S")
table(df2$sens2)
all_snps = unique(df2$mutationinformation)
all_snps_n = length(all_snps); all_snps_n
all_samples_id = unique(df2$id) # different to nrows
all_samples_id_n = length(all_samples_id); all_samples_id_n # different to nrows
sel_lineage = c("L1", "L2", "L3", "L4")
df2_lin = df2[df2$lineage%in%sel_lineage,]
sel_lin_snps = unique(df2_lin$mutationinformation)
sel_lin_snps_n = length(sel_lin_snps); sel_lin_snps_n
sel_lin_samples_id = unique(df2_lin$id)
sel_lin_samples_id_n = length(sel_lin_samples_id);sel_lin_samples_id_n
# are the snps that are not in L1-L4 unique to L5-L7
left_snps = all_snps[!all_snps%in%sel_lin_snps]
left_snps_n = length(left_snps); left_snps_n
if (all_snps_n == sel_lin_snps_n+left_snps_n){
cat("PASS: left snps extracted for gene", tolower(gene))
}else{
stop("Abort: left snps count mismatch")
}
left_snps
lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
table(lef_snps_df$lineage)
##################################
# selected lineage plots
##################################
#-----------------------------------------------
# step 0: Select muts for each target
#-----------------------------------------------
# embb
#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S")
# gid
sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D)
# katg
#sel_mutsP = c("")
# rpob
#sel_mutsP = c("")
#-----------------------------------------------
# step 1: Subset a smaller df
#-----------------------------------------------
cols_to_subset = c("mutationinformation"
, "lineage"
, "dst2"
, "sens2")
plot_df_gene = df2_lin[, cols_to_subset]
#-----------------------------------------------
# step 2: Subset data with just those genes
#-----------------------------------------------
plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,]
cat("\nnrow of plot_df:", nrow(plot_df_gene))
table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation)
#-----------------------------------------------
# step 3: Assign to plot_df
#-----------------------------------------------
plot_df = plot_df_gene
#-----------------------------------------------
# step 4: Add p-value
# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed
#-----------------------------------------------
#-----------------------------------------------
# step 5: Plot
#-----------------------------------------------
p_title = gene
ts = 8
gls = 3
DSplot = ggplot(plot_df, aes(x = lineage,
fill = sens2)) +
geom_bar(stat = 'count') +
scale_fill_manual(name = ""
# name = leg_title
, values = c("red", "blue")
#, labels = levels(sens2))
)+
facet_wrap(~mutationinformation
, scales = 'free_y'
#, ncol = 3
, nrow = 1
) +
theme(legend.position = "top"
, plot.title = element_text(hjust = 0.5, size=15,face = "italic")
#, plot.title = element_blank()
, strip.text = element_text(size=ts+2)
, axis.text.x = element_text(size=ts)
, axis.text.y = element_text(size=ts)
, axis.title.y = element_text(size=ts)
, legend.title = element_blank()
, axis.title.x = element_blank()
)+
labs(title = paste0(p_title
#, ": sensitivity by lineage"
),
y = 'Sample Count') #+
#geom_text(aes(label = pvalRF, x = 2.5, y = ypos_label+0.75))
# geom_blank(aes(y = ypos_label+1.25)) +
# geom_label(aes(label = pvalRF, x = 2.5, ypos_label+0.75), fill="white", size =gls)
#========
# Outplot
#========
outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/"
png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png")
, width = 8
, height = 3, units = "in", res = 300 )
DSplot
dev.off()

View file

@ -42,6 +42,8 @@ left_snps
lef_snps_df = df2[df2$mutationinformation%in%left_snps,] lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
table(lef_snps_df$lineage) table(lef_snps_df$lineage)
##################################
# selected lineage plots
################################## ##################################
#----------------------------------------------- #-----------------------------------------------
# step 0: Select muts for each target # step 0: Select muts for each target
@ -81,9 +83,9 @@ plot_df = plot_df_gene
#----------------------------------------------- #-----------------------------------------------
# step 4: Add p-value # step 4: Add p-value
# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed
#----------------------------------------------- #-----------------------------------------------
#----------------------------------------------- #-----------------------------------------------
# step 5: Plot # step 5: Plot
#----------------------------------------------- #-----------------------------------------------

View file

@ -1,15 +1,12 @@
#============= #=============
# Data: Input # Data: Input
#============== #==============
#source("~/git/LSHTM_analysis/config/embb.R") source("~/git/LSHTM_analysis/config/embb.R")
source("~/git/LSHTM_analysis/config/gid.R")
source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R") source("~/git/LSHTM_analysis/scripts/plotting/get_plotting_dfs.R")
# Now we need to make a column that fill na in dst with value of dst_mode # Now we need to make a column that fill na in dst with value of dst_mode
df2 = merged_df2 df2 = merged_df2
#table(df2$dst2)
df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst) df2$dst2 = ifelse(is.na(df2$dst), df2$dst_mode, df2$dst)
df2$sens2 = ifelse(df2$dst2 == 1, "R", "S") df2$sens2 = ifelse(df2$dst2 == 1, "R", "S")
@ -45,39 +42,40 @@ lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
table(lef_snps_df$lineage) table(lef_snps_df$lineage)
################################## ##################################
# selected lineage plots
##################################
#-----------------------------------------------
# step 0: Select muts for each target
#-----------------------------------------------
# embb
#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S")
# gid
#sel_mutsP = c("")
# katg
#sel_mutsP = c("")
# rpob
#sel_mutsP = c("")
# selected lineage plos # selected lineage plos
#-----------------------------------------------
# step 1: Subset a smaller df
#-----------------------------------------------
cols_to_subset = c("mutationinformation" cols_to_subset = c("mutationinformation"
, "lineage" , "lineage"
, "dst2" , "dst2"
, "sens2") , "sens2")
#-----------------------------------------------
# step 0: Subset a smaller df
#-----------------------------------------------
plot_df_gene = df2_lin[, cols_to_subset] plot_df_gene = df2_lin[, cols_to_subset]
#-----------------------------------------------
# step 1: Select muts for each target
#-----------------------------------------------
# embb
#sel_mutsP = c("D354N", "Y319D", "Y319D", "A962P", "S651N", "A201S")
# gid
sel_mutsP = c("P75R", "A19G", "A133P", "R154W", "R118L") #G30D)
# katg
#sel_mutsP = c("")
# rpob
#sel_mutsP = c("")
#----------------------------------------------- #-----------------------------------------------
# step 2: Subset data with just those genes # step 2: Subset data with just those genes
#----------------------------------------------- #-----------------------------------------------
plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,] plot_df_gene = plot_df_gene[plot_df_gene$mutationinformation%in%sel_mutsP,]
cat("\nnrow of plot_df:", nrow(plot_df_gene)) cat("\nnrow of plot_df:", nrow(plot_df_gene))
table(plot_df_gene$sens2, plot_df_gene$lineage, plot_df_gene$mutationinformation)
#----------------------------------------------- #-----------------------------------------------
# step 3: Assign to plot_df # step 3: Assign to plot_df
#----------------------------------------------- #-----------------------------------------------
plot_df = plot_df_gene plot_df = plot_df_gene
#----------------------------------------------- #-----------------------------------------------
@ -160,7 +158,7 @@ DSplot = ggplot(plot_df, aes(x = lineage,
#======== #========
outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/" outdir_lin = "/home/pub/Work/LSHTM/Thesis_Plots/"
png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png") png(paste0(outdir_lin, tolower(gene), "_linDS_selected.png")
, width = 8 , width = 4
, height = 3, units = "in", res = 300 ) , height = 4, units = "in", res = 300 )
DSplot DSplot
dev.off() dev.off()

View file

@ -41,6 +41,8 @@ left_snps
lef_snps_df = df2[df2$mutationinformation%in%left_snps,] lef_snps_df = df2[df2$mutationinformation%in%left_snps,]
table(lef_snps_df$lineage) table(lef_snps_df$lineage)
##################################
# selected lineage plots
################################## ##################################
#----------------------------------------------- #-----------------------------------------------
# step 0: Select muts for each target # step 0: Select muts for each target
@ -81,9 +83,9 @@ plot_df = plot_df_gene
#----------------------------------------------- #-----------------------------------------------
# step 4: Add p-value # step 4: Add p-value
# NOT NEEDED, get it from lineage_diff_sensitivities.R if needed
#----------------------------------------------- #-----------------------------------------------
#----------------------------------------------- #-----------------------------------------------
# step 5: Plot # step 5: Plot
#----------------------------------------------- #-----------------------------------------------