added foldx_scaled and deepddg_scaled values added to combine_df.py and also used that script to merge all the dfs so that merged_df2 and merged_df3 are infact what we need for downstream processing

This commit is contained in:
Tanushree Tunstall 2021-09-10 16:58:36 +01:00
parent dda5d1ea93
commit 4ba4ff602e
5 changed files with 354 additions and 977 deletions

View file

@ -4,21 +4,10 @@
# WF and LF data with lineage sample, and snp counts
# sourced by get_plotting_dfs.R
#########################################################
# working dir and loading libraries
# getwd()
# setwd("~/git/LSHTM_analysis/scripts/plotting")
# getwd()
# make cmd
# globals
# drug = "streptomycin"
# gene = "gid"
# source("get_plotting_dfs.R")
#=======================================================================
#################################################
#=================================================
# Get data with lineage count, and snp diversity
#################################################
#=================================================
table(merged_df2$lineage)
if (table(merged_df2$lineage == "")[[2]]) {
@ -30,12 +19,12 @@ cat("\nMissing samples with lineage classification:", table(merged_df2$lineage =
table(merged_df2$lineage_labels)
class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)
##################################
#==========================================
# WF data: lineages with
# snp count
# total_samples
# snp diversity (perc)
##################################
#==========================================
sel_lineages = levels(merged_df2$lineage_labels)
lin_wf = data.frame(sel_lineages) #4, 1
@ -67,9 +56,9 @@ lin_wf
lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
lin_wf
#=====================
#----------------------
# Add some formatting
#=====================
#----------------------
# SNP diversity
lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%")
@ -100,12 +89,12 @@ lin_wf$sel_lineages = factor(lin_wf$sel_lineages, c("L1"
levels(lin_wf$sel_lineages)
##################################
#=================================
# LF data: lineages with
# snp count
# total_samples
# snp diversity (perc)
##################################
#=================================
names(lin_wf)
tot_cols = ncol(lin_wf)
pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f")
@ -153,3 +142,6 @@ lin_lf$sel_lineages = factor(lin_lf$sel_lineages, c("L1"
, ""))
levels(lin_lf$sel_lineages)
################################################################