added lineage_dist.R nad renamed lineage_bp_data file to lineage_data
This commit is contained in:
parent
cdab8f0414
commit
66d45a8b73
2 changed files with 224 additions and 0 deletions
69
scripts/functions/lineage_dist.R
Normal file
69
scripts/functions/lineage_dist.R
Normal file
|
@ -0,0 +1,69 @@
|
||||||
|
###############################
|
||||||
|
# TASK: function to plot lineage
|
||||||
|
# dist plots with or without facet
|
||||||
|
# think about color palette
|
||||||
|
# for stability
|
||||||
|
##############################
|
||||||
|
|
||||||
|
#n_colours = length(unique(lin_dist_plot$duet_scaled))
|
||||||
|
#my_palette <- colorRampPalette(c(mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2))(n = n_colours+1)
|
||||||
|
|
||||||
|
|
||||||
|
lineage_distP <- function(plotdf
|
||||||
|
, x_axis = "duet_scaled"
|
||||||
|
, y_axis = "lineage_labels"
|
||||||
|
, x_lab = "DUET"
|
||||||
|
, with_facet = F
|
||||||
|
, facet_wrap_var = ""
|
||||||
|
, fill_categ = "mutation_info_labels"
|
||||||
|
, fill_categ_cols = c("#E69F00", "#999999")
|
||||||
|
, my_ats = 15 # axis text size
|
||||||
|
, my_als = 20 # axis label size
|
||||||
|
, my_leg_ts = 16
|
||||||
|
, my_leg_title = 16
|
||||||
|
, my_strip_ts = 20
|
||||||
|
, leg_pos = c(0.8, 0.9)
|
||||||
|
, leg_pos_wf = c("top", "left", "bottom", "right")
|
||||||
|
, leg_dir_wf = c("horizontal", "vertical")
|
||||||
|
, leg_label = "")
|
||||||
|
|
||||||
|
{
|
||||||
|
|
||||||
|
LinDistP = ggplot(plotdf, aes_string(x = x_axis
|
||||||
|
, y = y_axis))+
|
||||||
|
|
||||||
|
geom_density_ridges(aes_string(fill = fill_categ)
|
||||||
|
, scale = 3
|
||||||
|
, size = 0.3
|
||||||
|
, alpha = 0.8) +
|
||||||
|
scale_x_continuous(expand = c(0.01, 0.01)) +
|
||||||
|
#coord_cartesian( xlim = c(-1, 1)) +
|
||||||
|
scale_fill_manual(values = fill_categ_cols) +
|
||||||
|
theme(axis.text.x = element_text(size = my_ats
|
||||||
|
, angle = 90
|
||||||
|
, hjust = 1
|
||||||
|
, vjust = 0.4)
|
||||||
|
, axis.text.y = element_text(size = my_ats)
|
||||||
|
, axis.title.x = element_text(size = my_ats)
|
||||||
|
, axis.title.y = element_blank()
|
||||||
|
, strip.text = element_text(size = my_strip_ts)
|
||||||
|
, legend.text = element_text(size = my_leg_ts)
|
||||||
|
, legend.title = element_text(size = my_leg_title)
|
||||||
|
, legend.position = c(0.8, 0.9)) +
|
||||||
|
labs(x = x_lab
|
||||||
|
, fill = leg_label)
|
||||||
|
|
||||||
|
if (with_facet){
|
||||||
|
|
||||||
|
# used reformulate or make as formula
|
||||||
|
#fwv = reformulate(facet_wrap_var)
|
||||||
|
fwv = as.formula(paste0("~", facet_wrap_var))
|
||||||
|
|
||||||
|
LinDistP = LinDistP +
|
||||||
|
facet_wrap(fwv) +
|
||||||
|
theme(legend.position = leg_pos_wf
|
||||||
|
, legend.direction = leg_dir_wf)
|
||||||
|
}
|
||||||
|
|
||||||
|
return(LinDistP)
|
||||||
|
}
|
155
scripts/plotting/lineage_data.R
Executable file
155
scripts/plotting/lineage_data.R
Executable file
|
@ -0,0 +1,155 @@
|
||||||
|
#!/usr/bin/env Rscript
|
||||||
|
#########################################################
|
||||||
|
# TASK: Script to format data for lineage barplots:
|
||||||
|
# WF and LF data with lineage sample, and snp counts
|
||||||
|
# sourced by get_plotting_dfs.R
|
||||||
|
#########################################################
|
||||||
|
# working dir and loading libraries
|
||||||
|
# getwd()
|
||||||
|
# setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||||
|
# getwd()
|
||||||
|
|
||||||
|
# make cmd
|
||||||
|
# globals
|
||||||
|
# drug = "streptomycin"
|
||||||
|
# gene = "gid"
|
||||||
|
|
||||||
|
# source("get_plotting_dfs.R")
|
||||||
|
#=======================================================================
|
||||||
|
#################################################
|
||||||
|
# Get data with lineage count, and snp diversity
|
||||||
|
#################################################
|
||||||
|
table(merged_df2$lineage)
|
||||||
|
|
||||||
|
if (table(merged_df2$lineage == "")[[2]]) {
|
||||||
|
|
||||||
|
cat("\nMissing samples with lineage classification:", table(merged_df2$lineage == "")[[2]])
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
table(merged_df2$lineage_labels)
|
||||||
|
class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# WF data: lineages with
|
||||||
|
# snp count
|
||||||
|
# total_samples
|
||||||
|
# snp diversity (perc)
|
||||||
|
##################################
|
||||||
|
sel_lineages = levels(merged_df2$lineage_labels)
|
||||||
|
|
||||||
|
lin_wf = data.frame(sel_lineages) #4, 1
|
||||||
|
total_snps_u = NULL
|
||||||
|
total_samples = NULL
|
||||||
|
|
||||||
|
for (i in sel_lineages){
|
||||||
|
#print(i)
|
||||||
|
curr_total = length(unique(merged_df2$id)[merged_df2$lineage_labels==i])
|
||||||
|
#print(curr_total)
|
||||||
|
total_samples = c(total_samples, curr_total)
|
||||||
|
print(total_samples)
|
||||||
|
|
||||||
|
foo = merged_df2[merged_df2$lineage_labels==i,]
|
||||||
|
print(paste0(i, "=======\n"))
|
||||||
|
print(length(unique(foo$mutationinformation)))
|
||||||
|
curr_count = length(unique(foo$mutationinformation))
|
||||||
|
|
||||||
|
total_snps_u = c(total_snps_u, curr_count)
|
||||||
|
}
|
||||||
|
lin_wf
|
||||||
|
|
||||||
|
# Add these counts as columns to the df
|
||||||
|
lin_wf$num_snps_u = total_snps_u
|
||||||
|
lin_wf$total_samples = total_samples
|
||||||
|
lin_wf
|
||||||
|
|
||||||
|
# Add SNP diversity
|
||||||
|
lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
|
||||||
|
lin_wf
|
||||||
|
|
||||||
|
#=====================
|
||||||
|
# Add some formatting
|
||||||
|
#=====================
|
||||||
|
# SNP diversity
|
||||||
|
lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
|
||||||
|
lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%")
|
||||||
|
|
||||||
|
lin_wf$sel_lineages
|
||||||
|
|
||||||
|
# Important: Check factors so that x-axis categ appear as you want
|
||||||
|
lin_wf$sel_lineages = factor(lin_wf$sel_lineages, c("L1"
|
||||||
|
, "L2"
|
||||||
|
, "L3"
|
||||||
|
, "L4"
|
||||||
|
, "L5"
|
||||||
|
, "L6"
|
||||||
|
, "L7"
|
||||||
|
, "LBOV"
|
||||||
|
, "L1;L2"
|
||||||
|
, "L1;L3"
|
||||||
|
, "L1;L4"
|
||||||
|
, "L2;L3"
|
||||||
|
, "L2;L3;L4"
|
||||||
|
, "L2;L4"
|
||||||
|
, "L2;L6"
|
||||||
|
, "L2;LBOV"
|
||||||
|
, "L3;L4"
|
||||||
|
, "L4;L6"
|
||||||
|
, "L4;L7"
|
||||||
|
, ""))
|
||||||
|
|
||||||
|
levels(lin_wf$sel_lineages)
|
||||||
|
|
||||||
|
##################################
|
||||||
|
# LF data: lineages with
|
||||||
|
# snp count
|
||||||
|
# total_samples
|
||||||
|
# snp diversity (perc)
|
||||||
|
##################################
|
||||||
|
names(lin_wf)
|
||||||
|
tot_cols = ncol(lin_wf)
|
||||||
|
pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f")
|
||||||
|
pivot_cols_n = length(pivot_cols)
|
||||||
|
|
||||||
|
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
|
||||||
|
|
||||||
|
lin_lf <- gather(lin_wf
|
||||||
|
, count_categ
|
||||||
|
, p_count
|
||||||
|
, num_snps_u:total_samples
|
||||||
|
, factor_key = TRUE)
|
||||||
|
lin_lf
|
||||||
|
|
||||||
|
# quick checks
|
||||||
|
if ( nrow(lin_lf ) == expected_rows ){
|
||||||
|
cat("\nPASS: Lineage LF data created"
|
||||||
|
, "\nnrow: ", nrow(lin_lf)
|
||||||
|
, "\nncol: ", ncol(lin_lf))
|
||||||
|
} else {
|
||||||
|
cat("\nFAIL: numbers mismatch"
|
||||||
|
, "\nExpected nrow: ", expected_rows)
|
||||||
|
}
|
||||||
|
|
||||||
|
# Important: Relevel factors so that x-axis categ appear as you want
|
||||||
|
lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c("L1"
|
||||||
|
, "L2"
|
||||||
|
, "L3"
|
||||||
|
, "L4"
|
||||||
|
, "L5"
|
||||||
|
, "L6"
|
||||||
|
, "L7"
|
||||||
|
, "LBOV"
|
||||||
|
, "L1;L2"
|
||||||
|
, "L1;L3"
|
||||||
|
, "L1;L4"
|
||||||
|
, "L2;L3"
|
||||||
|
, "L2;L3;L4"
|
||||||
|
, "L2;L4"
|
||||||
|
, "L2;L6"
|
||||||
|
, "L2;LBOV"
|
||||||
|
, "L3;L4"
|
||||||
|
, "L4;L6"
|
||||||
|
, "L4;L7"
|
||||||
|
, ""))
|
||||||
|
|
||||||
|
levels(lin_lf$sel_lineages)
|
Loading…
Add table
Add a link
Reference in a new issue