added foldx_scaled and deepddg_scaled values added to combine_df.py and also used that script to merge all the dfs so that merged_df2 and merged_df3 are infact what we need for downstream processing

2021-09-10 16:58:36 +01:00 · 2021-09-10 16:58:36 +01:00 · 4ba4ff602e
commit 4ba4ff602e
parent dda5d1ea93
5 changed files with 354 additions and 977 deletions
--- a/scripts/plotting/lineage_data.R
+++ b/scripts/plotting/lineage_data.R
@ -4,21 +4,10 @@
 # WF and LF data with lineage sample, and snp counts
 # sourced by get_plotting_dfs.R
 #########################################################
-# working dir and loading libraries
-# getwd()
-# setwd("~/git/LSHTM_analysis/scripts/plotting")
-# getwd()

-# make cmd
-# globals
-# drug = "streptomycin"
-# gene = "gid"
-
-# source("get_plotting_dfs.R")
-#=======================================================================
-#################################################
+#=================================================
 # Get data with lineage count, and snp diversity
-#################################################
+#=================================================
 table(merged_df2$lineage)

 if (table(merged_df2$lineage == "")[[2]]) {
@ -30,12 +19,12 @@ cat("\nMissing samples with lineage classification:", table(merged_df2$lineage =
 table(merged_df2$lineage_labels)
 class(merged_df2$lineage_labels); nlevels(merged_df2$lineage_labels)

-##################################
+#==========================================
 # WF data: lineages with 
 # snp count
 # total_samples
 # snp diversity (perc)
-##################################
+#==========================================
 sel_lineages = levels(merged_df2$lineage_labels)

 lin_wf = data.frame(sel_lineages) #4, 1
@ -67,9 +56,9 @@ lin_wf
 lin_wf$snp_diversity = lin_wf$num_snps_u/lin_wf$total_samples
 lin_wf

-#=====================
+#----------------------
 # Add some formatting
-#=====================
+#----------------------
 # SNP diversity 
 lin_wf$snp_diversity_f = round( (lin_wf$snp_diversity * 100), digits = 0)
 lin_wf$snp_diversity_f = paste0(lin_wf$snp_diversity_f, "%")
@ -100,12 +89,12 @@ lin_wf$sel_lineages =  factor(lin_wf$sel_lineages, c("L1"

 levels(lin_wf$sel_lineages)

-##################################
+#=================================
 # LF data: lineages with 
 # snp count
 # total_samples
 # snp diversity (perc)
-##################################
+#=================================
 names(lin_wf)
 tot_cols = ncol(lin_wf)
 pivot_cols = c("sel_lineages", "snp_diversity", "snp_diversity_f")
@ -153,3 +142,6 @@ lin_lf$sel_lineages =  factor(lin_lf$sel_lineages, c("L1"
                                                     , ""))

 levels(lin_lf$sel_lineages)
+
+################################################################
+