moved all test scripts for functions to tests/

2021-09-09 13:12:07 +01:00 · 2021-09-09 13:12:07 +01:00 · 03031d2eb6
commit 03031d2eb6
parent 2ee66c770b
15 changed files with 162 additions and 776 deletions
--- a/scripts/functions/tests/test_aa_prop_bp.R
+++ b/scripts/functions/tests/test_aa_prop_bp.R
@ -0,0 +1,63 @@
+#!/usr/bin/env Rscript             
+library(ggplot2)
+library(tidyverse)
+library(data.table)
+
+setwd("~/git/LSHTM_analysis/scripts/functions/")
+getwd()
+#############################################################
+#===========================================
+# load functions, data, dirs, hardocded vars
+# that will be used in testing the functions
+#===========================================
+source("plotting_data.R")
+infile = "/home/tanu/git/Data/streptomycin/output/"
+pd_df = plotting_data(infile)
+my_df       = pd_df[[1]]
+my_df_u     = pd_df[[2]]
+my_df_u_lig = pd_df[[3]]
+dup_muts    = pd_df[[4]]
+
+source("../plotting_globals.R")
+drug = "streptomycin"
+gene = "gid"
+
+import_dirs(drug, gene)
+
+#=====================
+# functions to test 
+#=====================
+source("stability_count_bp.R")
+source("position_count_bp.R")
+#################################################################
+##############################################
+# read a sample file containing muts and prop 
+###############################################
+df<- read.csv(file.choose())
+
+setDT(df)[, pos_count := .N, by = .(position)]
+foo = data.frame(df$position, df$pos_count)
+
+#snpsBYpos_df <- df %>%
+#  group_by(position) %>%
+#  summarize(snpsBYpos = mean(pos_count))
+
+# subset df without duplicates for position
+df2 = df[!duplicated(df$position)]
+##################################################################
+# ---------------------------------------
+# barplot for nssnps, coloured by aa prop
+# ---------------------------------------
+pos_colname = "position"
+aa_prop_colname = "mut_prop_water"
+aa_prop_colours = c("black", "blue")
+my_legname = "aa_prop: water"
+
+# call function
+aa_prop_bp(plotdf = df
+           , position_colname = pos_colname
+           , fill_colname = aa_prop_colname
+           , fill_colours = aa_prop_cols
+           , leg_name = my_legname)
+           
+#===============================================================
--- a/scripts/functions/tests/test_af_or_calcs.R
+++ b/scripts/functions/tests/test_af_or_calcs.R
@ -0,0 +1,59 @@
+#!/usr/bin/env Rscript                                                  
+#########################################################
+# TASK: To calculate Allele Frequency and
+# Odds Ratio from master data
+#########################################################
+# load libraries
+#source("Header_TT.R")
+require("getopt", quietly = TRUE) # cmd parse arguments
+
+# working dir and loading libraries
+getwd()
+setwd("~/git/LSHTM_analysis/scripts/functions/")
+getwd()
+
+# load functions
+source("plotting_globals.R")
+source("mychisq_or.R")
+source("myaf_or_calcs.R")
+
+# cmd options + sensible defaults
+drug = "streptomycin"
+gene = "gid"
+
+# call function
+import_dirs(drug, gene)
+
+# input file 1: master data
+#in_filename_master  = 'original_tanushree_data_v2.csv' #19K
+in_filename_master  = 'mtb_gwas_meta_v6.csv' #35k
+infile_master = paste0(datadir, in_filename_master)
+cat(paste0('Reading infile1: raw data', ' ', infile_master) )
+
+# input file 2: gene associated meta data file to extract valid snps and add calcs to.
+# This is outfile_metadata from data_extraction.py
+in_filename_metadata =  paste0(tolower(gene), '_metadata.csv')
+infile_metadata = paste0(outdir, '/', in_filename_metadata)
+cat(paste0('Reading input file 2 i.e gene associated metadata:', infile_metadata))
+
+# out_filename_af_or = paste0(tolower(gene), '_meta_data_with_AF_OR.csv')
+out_filename_af_or = paste0(tolower(gene), '_af_or.csv')
+outfile_af_or = paste0(outdir, '/', out_filename_af_or)
+cat(paste0('Output file with full path:', outfile_af_or))
+
+cat("master data:", infile_master)
+cat("gene data:", infile_metadata)
+
+dr_muts_col # comes from global (dr_mutations_<drug>)
+other_muts_col # comes from global (other_mutations_<drug>)
+#################################################
+my_afor (  infile_master
+         , infile_metadata
+         , outfile = outfile_af_or
+         #, outfile = "FOO_TEST.csv"
+         , drug
+         , gene
+         , idcol = "id"
+         , dr_muts_col 
+         , other_muts_col
+         )
--- a/scripts/functions/tests/test_bp.R
+++ b/scripts/functions/tests/test_bp.R
@ -0,0 +1,113 @@
+#!/usr/bin/env Rscript             
+setwd("~/git/LSHTM_analysis/scripts/functions/")
+getwd()
+#############################################################
+#===========================================
+# load functions, data, dirs, hardocded vars
+# that will be used in testing the functions
+#===========================================
+drug = "streptomycin"
+gene = "gid"
+
+source("plotting_data.R")
+
+infile = paste0("~/git/Data/", drug, "/output/", gene, "_comb_stab_struc_params.csv")
+infile_df = read.csv(infile)
+
+lig_dist = 5
+pd_df = plotting_data(infile_df
+                      , lig_dist_colname = 'ligand_distance'
+                      , lig_dist_cutoff = lig_dist)
+
+my_df       = pd_df[[1]]
+my_df_u     = pd_df[[2]]
+my_df_u_lig = pd_df[[3]]
+dup_muts    = pd_df[[4]]
+
+#=====================
+# functions to test 
+#=====================
+source("stability_count_bp.R")
+source("position_count_bp.R")
+
+##################################################################
+# ------------------------------
+# barplot for mscm stability
+# ------------------------------
+basic_bp_duet =  paste0(tolower(gene), "_basic_barplot_PS.svg")
+plot_basic_bp_duet  =  paste0(plotdir,"/", basic_bp_duet)
+
+svg(plot_basic_bp_duet)
+print(paste0("plot filename:", basic_bp_duet))
+
+# function only
+stability_count_bp(plotdf = my_df_u
+               , df_colname = "duet_outcome"
+               , leg_title = "DUET outcome"
+               , label_categories = c("Destabilising", "Stabilising")
+               , leg_position = "top")
+
+dev.off()
+
+# ------------------------------
+# barplot for ligand affinity
+# ------------------------------
+basic_bp_ligand =  paste0(tolower(gene), "_basic_barplot_LIG.svg")
+plot_basic_bp_ligand  =  paste0(plotdir, "/", basic_bp_ligand)
+
+svg(plot_basic_bp_ligand)
+print(paste0("plot filename:", basic_bp_ligand))
+
+# function only
+lig_dist = 10
+stability_count_bp(plotdf = my_df_u_lig
+               , df_colname = "ligand_outcome"
+               , leg_title = "Ligand outcome"
+               , yaxis_title = paste0("Number of nsSNPs\nLigand dist: <", lig_dist, "\u212b")
+               #, bp_plot_title = "Sites < 10 Ang of ligand"
+               )
+
+dev.off()
+# ------------------------------
+# barplot for foldX
+# ------------------------------
+basic_bp_foldx = paste0(tolower(gene), "_basic_barplot_foldx.svg")
+plot_basic_bp_foldx  =  paste0(plotdir,"/", basic_bp_foldx)
+
+svg(plot_basic_bp_foldx)
+print(paste0("plot filename:", plot_basic_bp_foldx))
+
+stability_count_bp(plotdf = my_df_u
+                   , df_colname = "foldx_outcome"
+                   , leg_title = "FoldX outcome")
+dev.off()
+#===============================================================
+# ------------------------------
+# barplot for nssnp site count: all
+# ------------------------------
+pos_count_duet =  paste0(tolower(gene), "_position_count_PS.svg")
+plot_pos_count_duet = paste0(plotdir, "/", pos_count_duet)
+
+svg(plot_pos_count_duet)
+print(paste0("plot filename:", plot_pos_count_duet))
+
+# function only
+site_snp_count_bp(plotdf = my_df_u
+                  , df_colname = "position")
+
+dev.off()
+# ------------------------------
+# barplot for nssnp site count: within 10 Ang
+# ------------------------------
+pos_count_ligand =  paste0(tolower(gene), "_position_count_LIG.svg")
+plot_pos_count_ligand = paste0(plotdir, "/", pos_count_ligand)
+
+svg(plot_pos_count_ligand)
+print(paste0("plot filename:", plot_pos_count_ligand))
+
+# function only
+site_snp_count_bp(plotdf = my_df_u_lig
+                  , df_colname = "position")
+
+dev.off()
+#===============================================================
--- a/scripts/functions/tests/test_bp_lineage.R
+++ b/scripts/functions/tests/test_bp_lineage.R
@ -0,0 +1,62 @@
+setwd("~/git/LSHTM_analysis/scripts/plotting")
+
+source ('get_plotting_dfs.R')
+source("../functions/bp_lineage.R")
+
+#########################################
+# Lineage and SNP count: lineage lf data
+#########################################
+#=========================
+# Data: All lineages or
+# selected few
+#=========================
+sel_lineages = levels(lin_lf$sel_lineages_f)
+sel_lineages
+lin_lf_plot = lin_lf[lin_lf$sel_lineages_f%in%sel_lineages,]
+
+# drop unused factor levels
+lin_lf_plot$sel_lineages_f = factor(lin_lf_plot$sel_lineages_f)
+levels(lin_lf_plot$sel_lineages_f)
+#=========================
+# Lineage count plot
+#=========================
+lin_count_bp(lin_lf_plot
+             , x_categ = "sel_lineages_f"
+             , y_count = "p_count"
+             , bar_fill_categ = "count_categ"
+             , display_label_col = "p_count"
+             , bar_stat_stype = "identity"
+             , x_lab_angle = 90
+             , my_xats = 20
+             , bar_col_labels = c("Mutations", "Total Samples")
+             , bar_col_values = c("grey50", "gray75")
+             , y_scale_percent = F # T for diversity
+             , y_log10 = F
+             , y_label = "Count")
+
+###############################################
+# Lineage SNP diversity count: lineage wf data
+###############################################
+#=========================
+# Data: All lineages or
+# selected few
+#=========================
+sel_lineages = levels(lin_wf$sel_lineages_f)
+sel_lineages
+lin_wf_plot = lin_wf[lin_wf$sel_lineages_f%in%sel_lineages,]
+
+# drop unused factor levels
+lin_wf_plot$sel_lineages_f = factor(lin_wf_plot$sel_lineages_f)
+levels(lin_wf_plot$sel_lineages_f)
+#=========================
+# Lineage Diversity plot
+#=========================
+lin_count_bp(lin_wf_plot
+                 , x_categ = "sel_lineages_f"
+                 , y_count = "snp_diversity"
+                 , display_label_col = "snp_diversity_f"
+                 , bar_stat_stype = "identity"
+                 , x_lab_angle = 90
+                 , my_xats = 20
+                 , y_scale_percent = T
+                 , y_label = "SNP diversity")
--- a/scripts/functions/tests/test_combining_dfs_plotting.R
+++ b/scripts/functions/tests/test_combining_dfs_plotting.R
@ -0,0 +1,100 @@
+#!/usr/bin/env Rscript
+
+# working dir and loading libraries
+getwd()
+setwd("~/git/LSHTM_analysis/scripts/functions/")
+getwd()
+
+# infile_params = paste0(outdir, "/" , tolower(gene), "_comb_afor.csv")
+# infile_metadata = paste0(outdir, "/", tolower(gene), "_metadata")
+# 
+# 
+# source("combining_dfs_plotting_func.R")
+# 
+####################################################################
+# in_file_params = "~/git/Data/streptomycin/output/gid_comb_afor.csv"
+# in_file_metadata = "~/git/Data/streptomycin/output/gid_metadata.csv"
+# 
+# all_plot_dfs = combining_dfs_plotting(df1_mcsm_comb = infile_params
+#                        , df2_gene_metadata = infile_metadata
+#                        , lig_dist_colname = 'ligand_distance'
+#                        , lig_dist_cutoff = 10)
+# 
+# merged_df2        = all_plot_dfs[[1]]
+# merged_df3        = all_plot_dfs[[2]]
+# merged_df2_comp   = all_plot_dfs[[3]]
+# merged_df3_comp   = all_plot_dfs[[4]]
+# merged_df2_lig    = all_plot_dfs[[5]]
+# merged_df3_lig    = all_plot_dfs[[6]]
+# 
+# bar_colnames = data.frame(colnames(merged_df2))
+###########################################################
+source("plotting_globals.R")
+source("plotting_data.R")
+source("combining_dfs_plotting.R")
+
+#---------------------
+# call: import_dirs()
+#---------------------
+gene = 'gid'
+drug = 'streptomycin'
+
+import_dirs(drug_name = drug, gene_name = gene)
+
+
+#============================
+# Input 1: plotting_data()
+#============================
+if (!exists("infile_params") && exists("gene")){
+#if (!is.character(infile_params) && exists("gene")){
+  #in_filename_params = paste0(tolower(gene), "_all_params.csv") 
+  in_filename_params = paste0(tolower(gene), "_comb_afor.csv") # part combined for gid
+  infile_params = paste0(outdir, "/", in_filename_params)
+  cat("\nInput file for mcsm comb data not specified, assuming filename: ", infile_params, "\n")
+}
+
+mcsm_comb_data = read.csv(infile_params, header = T)
+
+#-------------------------------
+# call function: plotting_data()
+#-------------------------------
+pd_df = plotting_data(df = mcsm_comb_data
+                      , ligand_dist_colname = 'ligand_distance'
+                      , lig_dist_cutoff = 10
+my_df_u = pd_df[[2]] 
+
+#======================================
+# Input 2: read <gene>_meta data.csv
+#======================================
+if (!exists("infile_metadata") && exists("gene")){
+#if (!is.character(infile_params) && exists("gene")){{
+  in_filename_metadata = paste0(tolower(gene), "_metadata.csv") # part combined for gid
+  infile_metadata = paste0(outdir, "/", in_filename_metadata)
+  cat("\nInput file for gene metadata not specified, assuming filename: ", infile_metadata, "\n")
+}
+
+cat("\nReading meta data file:", infile_metadata)
+
+gene_metadata <- read.csv(infile_metadata
+                          , stringsAsFactors = F
+                          , header = T)
+                          
+#-----------------------------------------
+# test function: combining_dfs_plotting()
+#-----------------------------------------
+all_plot_dfs = combining_dfs_plotting(my_df_u
+                       , gene_metadata
+                       , lig_dist_colname = 'ligand_distance'
+                       , lig_dist_cutoff = 10)
+
+merged_df2          = all_plot_dfs[[1]]
+merged_df3          = all_plot_dfs[[2]]
+merged_df2_comp     = all_plot_dfs[[3]]
+merged_df3_comp     = all_plot_dfs[[4]]
+merged_df2_lig      = all_plot_dfs[[5]]
+merged_df3_lig      = all_plot_dfs[[6]]
+merged_df2_comp_lig = all_plot_dfs[[7]]
+merged_df3_comp_lig = all_plot_dfs[[8]]
+########################################################################
+#                           End of script
+########################################################################
--- a/scripts/functions/tests/test_lf_bp.R
+++ b/scripts/functions/tests/test_lf_bp.R
@ -0,0 +1,58 @@
+setwd("~/git/LSHTM_analysis/scripts/plotting/")
+source("Header_TT.R")
+source("../functions/lf_bp.R")
+# ================================================
+# Data: run get_plotting_data.R
+# to get the long format data to test this function 
+# drug = "streptomycin" 
+# gene = "gid"
+# source("get_plotting_dfs.R")
+# ==================================================
+
+######################
+# Make plot: ggplot
+######################
+lf_bp(lf_df = lf_encomddg
+      , p_title = "ENCoM-DDG"
+      , colour_categ = "ddg_encom_outcome"
+      , x_grp = "mutation_info"
+      , y_var = "param_value"
+      , facet_var = "param_type"
+      , n_facet_row = 1
+      , y_scales = "free_y"
+      , colour_bp_strip = "khaki2"
+      , dot_size = 3
+      , dot_transparency = 0.3
+      , violin_quantiles = c(0.25, 0.5, 0.75)
+      , my_ats = 22 # axis text size
+      , my_als = 20 # axis label size
+      , my_fls = 20 # facet label size
+      , my_pts = 22 # plot title size 
+      , make_boxplot = F
+      , bp_width = "auto"
+      , add_stats = T
+      , stat_grp_comp = c("DM", "OM")
+      , stat_method = "wilcox.test"
+      , my_paired = FALSE
+      , stat_label = c("p.format", "p.signif") )
+
+#wilcox.test(wf_encomdds$`EnCOM ΔΔS`[wf_encomdds$mutation_info == "DM"]
+#            , wf_encomdds$`EnCOM ΔΔS`[wf_encomdds$mutation_info == "OM"])
+
+######################
+# Make plot: plotly
+######################
+# FIXME: This labels are not working as I want!
+# lf_bp_plotly(lf_df = lf_deepddg
+#       , p_title = "DeepDDG"
+#       , colour_categ = "deepddg_outcome"
+#       , x_grp = "mutation_info"
+#       , y_var = "param_value"
+#       , facet_var = "param_type"
+#       , n_facet_row = 1
+#       , y_scales = "free_y"
+#       , colour_bp_strip = "khaki2"
+#       , dot_size = 3
+#       , dot_transparency = 0.3
+#       , violin_quantiles = c(0.25, 0.5, 0.75)
+#  )
--- a/scripts/functions/tests/test_lf_unpaired_stats.R
+++ b/scripts/functions/tests/test_lf_unpaired_stats.R
@ -0,0 +1,19 @@
+setwd("~/git/LSHTM_analysis/scripts/functions")
+source("lf_unpaired_stats.R")
+
+#####################
+# call stat function()
+# a useful way to check stats
+# for any lf data
+#####################
+# Note: Data
+# run other_plots_data.R
+# to get the long format data to test this function 
+
+stat_results_df <- lf_unpaired_stats(lf_data =  lf_duet
+                  , lf_stat_value = "param_value"
+                  , lf_stat_group = "mutation_info"
+                  , lf_col_statvars = "param_type"
+                  , my_paired = FALSE
+                  , stat_adj = "none"
+)
--- a/scripts/functions/tests/test_lineage_dist.R
+++ b/scripts/functions/tests/test_lineage_dist.R
@ -0,0 +1,32 @@
+###############################
+# TEST function lineage_dist.R
+# to plot lineage
+# dist plots with or without facet
+##############################
+getwd()
+setwd("~/git/LSHTM_analysis/scripts/plotting/")
+getwd()
+
+source("Header_TT.R")
+
+source("get_plotting_dfs.R")
+
+cat("cols imported:"
+    , mcsm_red2, mcsm_red1, mcsm_mid, mcsm_blue1, mcsm_blue2)
+
+
+#############################################################
+
+lineage_distP(lin_dist_plot
+              , with_facet = F
+              , leg_label = "Mutation Class"
+)
+
+lineage_distP(lin_dist_plot
+              , with_facet = T
+              , facet_wrap_var = "mutation_info_labels"
+              , leg_label = "Mutation Class"
+              , leg_pos_wf = "none"
+              , leg_dir_wf = "horizontal"
+              
+)
--- a/scripts/functions/tests/test_plotting_data.R
+++ b/scripts/functions/tests/test_plotting_data.R
@ -0,0 +1,35 @@
+#!/usr/bin/env Rscript             
+getwd()
+setwd("~/git/LSHTM_analysis/scripts/functions/")
+getwd()
+#############################################################
+#===========================================
+# load functions, data, dirs, hardocded vars
+# that will be used in testing the functions
+#===========================================
+source("plotting_globals.R")
+
+drug = "streptomycin"
+gene = "gid"
+
+import_dirs(drug_name = drug, gene_name = gene)
+
+#-------------------------------
+# test function: plotting_data()
+#-------------------------------
+source("plotting_data.R")
+
+infile_params = "/home/tanu/git/Data/streptomycin/output/gid_comb_stab_struc_params.csv"
+mcsm_comb_data = read.csv(infile_params, header = T)
+
+pd_df = plotting_data(df = mcsm_comb_data
+                      , ligand_dist_colname = 'ligand_distance'
+                      , lig_dist_cutoff = 10)
+
+my_df       = pd_df[[1]]
+my_df_u     = pd_df[[2]]
+my_df_u_lig = pd_df[[3]]
+dup_muts    = pd_df[[4]]
+########################################################################
+#                           End of script
+########################################################################