added lineage_labels and mutation_info_labels to combinig_dfs_plotting
This commit is contained in:
parent
9bb3ac2a01
commit
cdab8f0414
4 changed files with 38 additions and 187 deletions
|
@ -153,6 +153,40 @@ combining_dfs_plotting <- function( my_df_u
|
||||||
quit()
|
quit()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Quick formatting: pretty labels
|
||||||
|
#-----------------------
|
||||||
|
# mutation_info_labels
|
||||||
|
#-----------------------
|
||||||
|
merged_df2$mutation_info_labels = ifelse(merged_df2$mutation_info == dr_muts_col
|
||||||
|
, "DM", "OM")
|
||||||
|
merged_df2$mutation_info_labels = factor(merged_df2$mutation_info_labels)
|
||||||
|
#-----------------------
|
||||||
|
# lineage labels
|
||||||
|
#-----------------------
|
||||||
|
merged_df2$lineage_labels = gsub("lineage", "L", merged_df2$lineage)
|
||||||
|
|
||||||
|
merged_df2$lineage_labels = factor(merged_df2$lineage_labels, c("L1"
|
||||||
|
, "L2"
|
||||||
|
, "L3"
|
||||||
|
, "L4"
|
||||||
|
, "L5"
|
||||||
|
, "L6"
|
||||||
|
, "L7"
|
||||||
|
, "LBOV"
|
||||||
|
, "L1;L2"
|
||||||
|
, "L1;L3"
|
||||||
|
, "L1;L4"
|
||||||
|
, "L2;L3"
|
||||||
|
, "L2;L3;L4"
|
||||||
|
, "L2;L4"
|
||||||
|
, "L2;L6"
|
||||||
|
, "L2;LBOV"
|
||||||
|
, "L3;L4"
|
||||||
|
, "L4;L6"
|
||||||
|
, "L4;L7"
|
||||||
|
, ""))
|
||||||
|
|
||||||
|
|
||||||
#=================================================================
|
#=================================================================
|
||||||
# Merge 2: merged_df3
|
# Merge 2: merged_df3
|
||||||
# dfs with NAs in ORs
|
# dfs with NAs in ORs
|
||||||
|
|
|
@ -16,12 +16,13 @@ cat("cols imported:"
|
||||||
|
|
||||||
|
|
||||||
#############################################################
|
#############################################################
|
||||||
|
# without facet
|
||||||
lineage_distP(lin_dist_plot
|
lineage_distP(lin_dist_plot
|
||||||
, with_facet = F
|
, with_facet = F
|
||||||
, leg_label = "Mutation Class"
|
, leg_label = "Mutation Class"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# without facet
|
||||||
lineage_distP(lin_dist_plot
|
lineage_distP(lin_dist_plot
|
||||||
, with_facet = T
|
, with_facet = T
|
||||||
, facet_wrap_var = "mutation_info_labels"
|
, facet_wrap_var = "mutation_info_labels"
|
||||||
|
|
|
@ -55,9 +55,7 @@ plot_basic_bp_lineage_cl
|
||||||
# Data: All lineages or
|
# Data: All lineages or
|
||||||
# selected few
|
# selected few
|
||||||
#------------------------
|
#------------------------
|
||||||
sel_lineages = levels(lin_lf$sel_lineages)[1:4]
|
lin_lf_plot = lin_lf[lin_lf$sel_lineages%in%c("L1", "L2", "L3", "L4"),]
|
||||||
sel_lineages
|
|
||||||
lin_lf_plot = lin_lf[lin_lf$sel_lineages%in%sel_lineages,]
|
|
||||||
str(lin_lf_plot)
|
str(lin_lf_plot)
|
||||||
|
|
||||||
# drop unused factor levels
|
# drop unused factor levels
|
||||||
|
@ -90,9 +88,7 @@ lin_countP
|
||||||
# Data: All lineages or
|
# Data: All lineages or
|
||||||
# selected few
|
# selected few
|
||||||
#------------------------
|
#------------------------
|
||||||
sel_lineages = levels(lin_wf$sel_lineages)[1:4]
|
lin_wf_plot = lin_wf[lin_wf$sel_lineages%in%c("L1", "L2", "L3", "L4"),]
|
||||||
sel_lineages
|
|
||||||
lin_wf_plot = lin_wf[lin_wf$sel_lineages%in%sel_lineages,]
|
|
||||||
str(lin_wf_plot)
|
str(lin_wf_plot)
|
||||||
|
|
||||||
# drop unused factor levels
|
# drop unused factor levels
|
||||||
|
|
|
@ -1,180 +0,0 @@
|
||||||
#!/usr/bin/env Rscript
|
|
||||||
#########################################################
|
|
||||||
# TASK: Script to format data for lineage barplots:
|
|
||||||
# WF and LF data with lineage sample, and snp counts
|
|
||||||
# sourced by get_plotting_dfs.R
|
|
||||||
#########################################################
|
|
||||||
# working dir and loading libraries
|
|
||||||
# getwd()
|
|
||||||
# setwd("~/git/LSHTM_analysis/scripts/plotting")
|
|
||||||
# getwd()
|
|
||||||
|
|
||||||
# make cmd
|
|
||||||
# globals
|
|
||||||
# drug = "streptomycin"
|
|
||||||
# gene = "gid"
|
|
||||||
|
|
||||||
# source("get_plotting_dfs.R")
|
|
||||||
#=======================================================================
|
|
||||||
#################################################
|
|
||||||
# Get data with lineage count, and snp diversity
|
|
||||||
#################################################
|
|
||||||
table(merged_df2$lineage)
|
|
||||||
|
|
||||||
if (table(merged_df2$lineage == "")[[2]]) {
|
|
||||||
|
|
||||||
cat("\nMissing samples with lineage classification:", table(merged_df2$lineage == "")[[2]])
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
# Add pretty lineage labels and mut_info_labels
|
|
||||||
class(merged_df2$lineage); table(merged_df2$lineage)
|
|
||||||
merged_df2$lineage_labels = gsub("lineage", "L", merged_df2$lineage)
|
|
||||||
table(merged_df2$lineage_labels)
|
|
||||||
|
|
||||||
class(merged_df2$lineage_labels)
|
|
||||||
|
|
||||||
merged_df2$lineage_labels = factor(merged_df2$lineage_labels, c("L1"
|
|
||||||
, "L2"
|
|
||||||
, "L3"
|
|
||||||
, "L4"
|
|
||||||
, "L5"
|
|
||||||
, "L6"
|
|
||||||
, "L7"
|
|
||||||
, "LBOV"
|
|
||||||
, "L1;L2"
|
|
||||||
, "L1;L3"
|
|
||||||
, "L1;L4"
|
|
||||||
, "L2;L3"
|
|
||||||
, "L2;L3;L4"
|
|
||||||
, "L2;L4"
|
|
||||||
, "L2;L6"
|
|
||||||
, "L2;LBOV"
|
|
||||||
, "L3;L4"
|
|
||||||
, "L4;L6"
|
|
||||||
, "L4;L7"
|
|
||||||
, ""))
|
|
||||||
|
|
||||||
class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
|
|
||||||
|
|
||||||
|
|
||||||
##################################
|
|
||||||
# WF data: lineages with
|
|
||||||
# snp count
|
|
||||||
# total_samples
|
|
||||||
# snp diversity (perc)
|
|
||||||
##################################
|
|
||||||
sel_lineages = levels(merged_df2$lineage_labels)
|
|
||||||
|
|
||||||
lin_wf = data.frame(sel_lineages) #4, 1
|
|
||||||
total_snps_u = NULL
|
|
||||||
total_samples = NULL
|
|
||||||
|
|
||||||
for (i in sel_lineages){
|
|
||||||
#print(i)
|
|
||||||
curr_total = length(unique(merged_df2$id)[merged_df2$lineage_labels==i])
|
|
||||||
#print(curr_total)
|
|
||||||
total_samples = c(total_samples, curr_total)
|
|
||||||
print(total_samples)
|
|
||||||
|
|
||||||
foo = merged_df2[merged_df2$lineage_labels==i,]
|
|
||||||
print(paste0(i, "=======\n"))
|
|
||||||
print(length(unique(foo$mutationinformation)))
|
|
||||||
curr_count = length(unique(foo$mutationinformation))
|
|
||||||
|
|
||||||
total_snps_u = c(total_snps_u, curr_count)
|
|
||||||
}
|
|
||||||
lin_wf
|
|
||||||
|
|
||||||
# Add these counts as columns to the df
|
|
||||||
lin_wf$num_snps_u = total_snps_u
|
|
||||||
lin_wf$total_samples = total_samples
|
|
||||||
|
|
||||||
# Add SNP diversity
|
|
||||||
lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
|
|
||||||
lin_wf
|
|
||||||
|
|
||||||
#=====================
|
|
||||||
# Add some formatting
|
|
||||||
#=====================
|
|
||||||
# SNP diversity
|
|
||||||
lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
|
|
||||||
lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%")
|
|
||||||
|
|
||||||
# Important: Check factors so that x-axis categ appear as you want
|
|
||||||
lin_wf$sel_lineages = factor(lin_wf$sel_lineages, c("L1"
|
|
||||||
, "L2"
|
|
||||||
, "L3"
|
|
||||||
, "L4"
|
|
||||||
, "L5"
|
|
||||||
, "L6"
|
|
||||||
, "L7"
|
|
||||||
, "LBOV"
|
|
||||||
, "L1;L2"
|
|
||||||
, "L1;L3"
|
|
||||||
, "L1;L4"
|
|
||||||
, "L2;L3"
|
|
||||||
, "L2;L3;L4"
|
|
||||||
, "L2;L4"
|
|
||||||
, "L2;L6"
|
|
||||||
, "L2;LBOV"
|
|
||||||
, "L3;L4"
|
|
||||||
, "L4;L6"
|
|
||||||
, "L4;L7"
|
|
||||||
, ""))
|
|
||||||
|
|
||||||
levels(lin_wf$sel_lineages)
|
|
||||||
|
|
||||||
##################################
|
|
||||||
# LF data: lineages with
|
|
||||||
# snp count
|
|
||||||
# total_samples
|
|
||||||
# snp diversity (perc)
|
|
||||||
##################################
|
|
||||||
names(lin_wf)
|
|
||||||
tot_cols = ncol(lin_wf)
|
|
||||||
pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f")
|
|
||||||
pivot_cols_n = length(pivot_cols)
|
|
||||||
|
|
||||||
expected_rows = nrow(lin_wf) * ( length(lin_wf) - pivot_cols_n )
|
|
||||||
|
|
||||||
lin_lf <- gather(lin_wf
|
|
||||||
, count_categ
|
|
||||||
, p_count
|
|
||||||
, num_snps_u:total_samples
|
|
||||||
, factor_key = TRUE)
|
|
||||||
lin_lf
|
|
||||||
|
|
||||||
# quick checks
|
|
||||||
if ( nrow(lin_lf) == expected_rows ){
|
|
||||||
cat("\nPASS: Lineage LF data created"
|
|
||||||
, "\nnrow: ", nrow(lin_lf)
|
|
||||||
, "\nncol: ", ncol(lin_lf))
|
|
||||||
} else {
|
|
||||||
cat("\nFAIL: numbers mismatch"
|
|
||||||
, "\nExpected nrow: ", expected_rows)
|
|
||||||
}
|
|
||||||
|
|
||||||
# Important: Relevel factors so that x-axis categ appear as you want
|
|
||||||
lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c("L1"
|
|
||||||
, "L2"
|
|
||||||
, "L3"
|
|
||||||
, "L4"
|
|
||||||
, "L5"
|
|
||||||
, "L6"
|
|
||||||
, "L7"
|
|
||||||
, "LBOV"
|
|
||||||
, "L1;L2"
|
|
||||||
, "L1;L3"
|
|
||||||
, "L1;L4"
|
|
||||||
, "L2;L3"
|
|
||||||
, "L2;L3;L4"
|
|
||||||
, "L2;L4"
|
|
||||||
, "L2;L6"
|
|
||||||
, "L2;LBOV"
|
|
||||||
, "L3;L4"
|
|
||||||
, "L4;L6"
|
|
||||||
, "L4;L7"
|
|
||||||
, ""))
|
|
||||||
|
|
||||||
levels(lin_lf$sel_lineages)
|
|
Loading…
Add table
Add a link
Reference in a new issue