added function for generating lineage barplots and also test script along wiadding script for processing data and added it to get_plotting_dfs.R
This commit is contained in:
parent
605eb54526
commit
869fca7f94
6 changed files with 470 additions and 5 deletions
|
@ -437,13 +437,19 @@ if (nrow(corr_ps_df3) == nrow(merged_df3) && nrow(merged_df3_comp) == check1) {
|
|||
, "\nGot: ", check1)
|
||||
}
|
||||
|
||||
|
||||
rm(foo)
|
||||
####################################################################
|
||||
# Data for DM OM Plots: Long format dfs
|
||||
####################################################################
|
||||
|
||||
source("other_plots_data.R")
|
||||
|
||||
####################################################################
|
||||
# Data for Lineage barplots: WF and LF dfs
|
||||
####################################################################
|
||||
|
||||
source("lineage_bp_data.R")
|
||||
|
||||
########################################################################
|
||||
# End of script
|
||||
########################################################################
|
||||
|
|
173
scripts/plotting/lineage_bp_data.R
Normal file
173
scripts/plotting/lineage_bp_data.R
Normal file
|
@ -0,0 +1,173 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: Script to format data for lineage barplots:
|
||||
# WF and LF data with lineage sample, and snp counts
|
||||
# sourced by get_plotting_dfs.R
|
||||
#########################################################
|
||||
# working dir and loading libraries
|
||||
# getwd()
|
||||
# setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
# getwd()
|
||||
|
||||
# make cmd
|
||||
# globals
|
||||
# drug = "streptomycin"
|
||||
# gene = "gid"
|
||||
|
||||
# source("get_plotting_dfs.R")
|
||||
#=======================================================================
|
||||
#################################################
|
||||
# Get data with lineage count, and snp diversity
|
||||
#################################################
|
||||
table(merged_df2$lineage)
|
||||
|
||||
if (table(merged_df2$lineage == "")[[2]]) {
|
||||
|
||||
cat("\nMissing samples with lineage classification:", table(merged_df2$lineage == "")[[2]])
|
||||
|
||||
}
|
||||
|
||||
##################################
|
||||
# WF data: lineages with
|
||||
# snp count
|
||||
# total_samples
|
||||
# snp diversity (perc)
|
||||
##################################
|
||||
sel_lineages = levels(as.factor(merged_df2$lineage))
|
||||
|
||||
lin_wf = data.frame(sel_lineages) #4, 1
|
||||
total_snps_u = NULL
|
||||
total_samples = NULL
|
||||
|
||||
for (i in sel_lineages){
|
||||
#print(i)
|
||||
curr_total = length(unique(merged_df2$id)[merged_df2$lineage==i])
|
||||
#print(curr_total)
|
||||
total_samples = c(total_samples, curr_total)
|
||||
print(total_samples)
|
||||
|
||||
foo = merged_df2[merged_df2$lineage==i,]
|
||||
print(paste0(i, "=======\n"))
|
||||
print(length(unique(foo$mutationinformation)))
|
||||
curr_count = length(unique(foo$mutationinformation))
|
||||
|
||||
total_snps_u = c(total_snps_u, curr_count)
|
||||
}
|
||||
lin_wf
|
||||
|
||||
# Add these counts as columns to the df
|
||||
lin_wf$num_snps_u = total_snps_u
|
||||
lin_wf$total_samples = total_samples
|
||||
|
||||
# Add SNP diversity
|
||||
lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
|
||||
lin_wf
|
||||
|
||||
#=====================
|
||||
# Add some formatting
|
||||
#=====================
|
||||
# SNP diversity
|
||||
lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
|
||||
lin_wf
|
||||
|
||||
# Lineage names
|
||||
lin_wf$sel_lineages_f = gsub("lineage", "L", lin_wf$sel_lineages)
|
||||
lin_wf
|
||||
|
||||
# # Lineage names
|
||||
# lin_wf = lin_wf %>%
|
||||
# mutate(ordering_category = case_when(
|
||||
# sel_lineages_f == "" ~ 0
|
||||
# , sel_lineages_f == "L1" ~ 1
|
||||
# , sel_lineages_f == "L2" ~ 2
|
||||
# , sel_lineages_f == "L3" ~ 3
|
||||
# , sel_lineages_f == "L4" ~ 4
|
||||
# , sel_lineages_f == "L5" ~ 5
|
||||
# , sel_lineages_f == "L6" ~ 6
|
||||
# , sel_lineages_f == "L7" ~ 7
|
||||
# , sel_lineages_f == "LBOV" ~ 8
|
||||
#
|
||||
# , sel_lineages_f == "L1;L2" ~ 9
|
||||
# , sel_lineages_f == "L1;L3" ~ 10
|
||||
# , sel_lineages_f == "L1;L4" ~ 11
|
||||
#
|
||||
# , sel_lineages_f == "L2;L3" ~ 12
|
||||
# , sel_lineages_f == "L2;L3;L4" ~ 13
|
||||
# , sel_lineages_f == "L2;L4" ~ 14
|
||||
# , sel_lineages_f == "L2;L6" ~ 15
|
||||
# , sel_lineages_f == "L2;LBOV" ~ 16
|
||||
#
|
||||
# , sel_lineages_f == "L3;L4" ~ 17
|
||||
#
|
||||
# , sel_lineages_f == "L4;L6" ~ 18
|
||||
# , sel_lineages_f == "L4;L7" ~ 19
|
||||
#
|
||||
# , FALSE ~ -1)
|
||||
# )
|
||||
|
||||
##################################
|
||||
# LF data: lineages with
|
||||
# snp count
|
||||
# total_samples
|
||||
# snp diversity (perc)
|
||||
##################################
|
||||
names(lin_wf)
|
||||
tot_cols = ncol(lin_wf)
|
||||
pivot_cols = c("sel_lineages", "sel_lineages_f", "snp_diversity", "snp_diversity_f")
|
||||
pivot_cols_n = length(pivot_cols)
|
||||
|
||||
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
|
||||
|
||||
lin_lf <- gather(lin_wf
|
||||
, count_categ
|
||||
, p_count
|
||||
, num_snps_u:total_samples
|
||||
, factor_key = TRUE)
|
||||
lin_lf
|
||||
|
||||
# quick checks
|
||||
if ( nrow(lin_lf) == expected_rows ){
|
||||
cat("\nPASS: Lineage LF data created"
|
||||
, "\nnrow: ", nrow(lin_lf)
|
||||
, "\nncol: ", ncol(lin_lf))
|
||||
} else {
|
||||
cat("\nFAIL: numbers mismatch"
|
||||
, "\nExpected nrow: ", expected_rows)
|
||||
}
|
||||
#######################################
|
||||
# #=====================
|
||||
# # Add some formatting
|
||||
# #=====================
|
||||
# lin_lf$sel_lineages_f = gsub("lineage", "L", lin_lf$sel_lineages)
|
||||
# lin_lf
|
||||
|
||||
|
||||
# lin_lf = lin_lf %>%
|
||||
# mutate(ordering_category = case_when(
|
||||
# sel_lineages_f == "" ~ 0
|
||||
# , sel_lineages_f == "L1" ~ 1
|
||||
# , sel_lineages_f == "L2" ~ 2
|
||||
# , sel_lineages_f == "L3" ~ 3
|
||||
# , sel_lineages_f == "L4" ~ 4
|
||||
# , sel_lineages_f == "L5" ~ 5
|
||||
# , sel_lineages_f == "L6" ~ 6
|
||||
# , sel_lineages_f == "L7" ~ 7
|
||||
# , sel_lineages_f == "LBOV" ~ 8
|
||||
#
|
||||
# , sel_lineages_f == "L1;L2" ~ 9
|
||||
# , sel_lineages_f == "L1;L3" ~ 10
|
||||
# , sel_lineages_f == "L1;L4" ~ 11
|
||||
#
|
||||
# , sel_lineages_f == "L2;L3" ~ 12
|
||||
# , sel_lineages_f == "L2;L3;L4" ~ 13
|
||||
# , sel_lineages_f == "L2;L4" ~ 14
|
||||
# , sel_lineages_f == "L2;L6" ~ 15
|
||||
# , sel_lineages_f == "L2;LBOV" ~ 16
|
||||
#
|
||||
# , sel_lineages_f == "L3;L4" ~ 17
|
||||
#
|
||||
# , sel_lineages_f == "L4;L6" ~ 18
|
||||
# , sel_lineages_f == "L4;L7" ~ 19
|
||||
#
|
||||
# , FALSE ~ -1)
|
||||
# )
|
|
@ -1,7 +1,8 @@
|
|||
#!/usr/bin/env Rscript
|
||||
#########################################################
|
||||
# TASK: producing boxplots for dr and other muts
|
||||
|
||||
# TASK: Script to format data for dm om plots:
|
||||
# generating LF data
|
||||
# sourced by get_plotting_dfs.R
|
||||
#########################################################
|
||||
# working dir and loading libraries
|
||||
# getwd()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue