added foldx_scaled and deepddg_scaled values added to combine_df.py and also used that script to merge all the dfs so that merged_df2 and merged_df3 are infact what we need for downstream processing
This commit is contained in:
parent
dda5d1ea93
commit
4ba4ff602e
5 changed files with 354 additions and 977 deletions
|
@ -4,21 +4,10 @@
|
|||
# WF and LF data with lineage sample, and snp counts
|
||||
# sourced by get_plotting_dfs.R
|
||||
#########################################################
|
||||
# working dir and loading libraries
|
||||
# getwd()
|
||||
# setwd("~/git/LSHTM_analysis/scripts/plotting")
|
||||
# getwd()
|
||||
|
||||
# make cmd
|
||||
# globals
|
||||
# drug = "streptomycin"
|
||||
# gene = "gid"
|
||||
|
||||
# source("get_plotting_dfs.R")
|
||||
#=======================================================================
|
||||
#################################################
|
||||
#=================================================
|
||||
# Get data with lineage count, and snp diversity
|
||||
#################################################
|
||||
#=================================================
|
||||
table(merged_df2$lineage)
|
||||
|
||||
if (table(merged_df2$lineage == "")[[2]]) {
|
||||
|
@ -30,12 +19,12 @@ cat("\nMissing samples with lineage classification:", table(merged_df2$lineage =
|
|||
table(merged_df2$lineage_labels)
|
||||
class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
|
||||
|
||||
##################################
|
||||
#==========================================
|
||||
# WF data: lineages with
|
||||
# snp count
|
||||
# total_samples
|
||||
# snp diversity (perc)
|
||||
##################################
|
||||
#==========================================
|
||||
sel_lineages = levels(merged_df2$lineage_labels)
|
||||
|
||||
lin_wf = data.frame(sel_lineages) #4, 1
|
||||
|
@ -67,9 +56,9 @@ lin_wf
|
|||
lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
|
||||
lin_wf
|
||||
|
||||
#=====================
|
||||
#----------------------
|
||||
# Add some formatting
|
||||
#=====================
|
||||
#----------------------
|
||||
# SNP diversity
|
||||
lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
|
||||
lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%")
|
||||
|
@ -100,12 +89,12 @@ lin_wf$sel_lineages = factor(lin_wf$sel_lineages, c("L1"
|
|||
|
||||
levels(lin_wf$sel_lineages)
|
||||
|
||||
##################################
|
||||
#=================================
|
||||
# LF data: lineages with
|
||||
# snp count
|
||||
# total_samples
|
||||
# snp diversity (perc)
|
||||
##################################
|
||||
#=================================
|
||||
names(lin_wf)
|
||||
tot_cols = ncol(lin_wf)
|
||||
pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f")
|
||||
|
@ -153,3 +142,6 @@ lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c("L1"
|
|||
, ""))
|
||||
|
||||
levels(lin_lf$sel_lineages)
|
||||
|
||||
################################################################
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue